diff --git a/src/bufr/BufrParser/BufrParser.cpp b/src/bufr/BufrParser/BufrParser.cpp index eae792d27..4686012ae 100644 --- a/src/bufr/BufrParser/BufrParser.cpp +++ b/src/bufr/BufrParser/BufrParser.cpp @@ -66,9 +66,8 @@ namespace Ingester { { for (const auto& queryInfo : var->getQueryList()) { - auto resultBase = resultSet.get( - queryInfo.name, queryInfo.groupByField); - srcData[queryInfo.name] = DataObjectBase::fromResult(resultBase, queryInfo.query); + srcData[queryInfo.name] = resultSet.get( + queryInfo.name, queryInfo.groupByField, queryInfo.type); } } diff --git a/src/bufr/BufrParser/Exports/Export.cpp b/src/bufr/BufrParser/Exports/Export.cpp index 3453b2286..b421af3ad 100644 --- a/src/bufr/BufrParser/Exports/Export.cpp +++ b/src/bufr/BufrParser/Exports/Export.cpp @@ -32,9 +32,9 @@ namespace namespace Variable { const char* Datetime = "datetime"; - const char* Mnemonic = "mnemonic"; const char* Query = "query"; const char* GroupByField = "group_by"; // Deprecated + const char* Type = "type"; } // namespace Variable namespace Split @@ -125,13 +125,6 @@ namespace Ingester auto dtconf = subConf.getSubConfiguration(ConfKeys::Variable::Datetime); variable = std::make_shared(key, groupByField, dtconf); } - else if (subConf.has(ConfKeys::Variable::Mnemonic)) - { - std::ostringstream errMsg; - errMsg << "Obsolete format::exports::variable of type " << key << std::endl; - errMsg << "Use \"query:\" instead."; - throw eckit::BadParameter(errMsg.str()); - } else if (subConf.has(ConfKeys::Variable::Query)) { Transforms transforms = TransformBuilder::makeTransforms(subConf); @@ -146,9 +139,16 @@ namespace Ingester throw eckit::BadParameter(errMsg.str()); } + std::string type = ""; + if (subConf.has(ConfKeys::Variable::Type)) + { + type = subConf.getString(ConfKeys::Variable::Type); + } + variable = std::make_shared(key, query, groupByField, + type, transforms); } else diff --git a/src/bufr/BufrParser/Exports/Variables/QueryVariable.cpp b/src/bufr/BufrParser/Exports/Variables/QueryVariable.cpp index 7e06c1af0..b0033c748 100644 --- a/src/bufr/BufrParser/Exports/Variables/QueryVariable.cpp +++ b/src/bufr/BufrParser/Exports/Variables/QueryVariable.cpp @@ -19,10 +19,12 @@ namespace Ingester QueryVariable::QueryVariable(const std::string& exportName, const std::string& query, const std::string& groupByField, + const std::string& type, const Transforms& transforms) : Variable(exportName), query_(query), groupByField_(groupByField), + type_(type), transforms_(transforms) { initQueryMap(); @@ -56,6 +58,7 @@ namespace Ingester info.name = getExportName(); info.query = query_; info.groupByField = groupByField_; + info.type = type_; queries.push_back(info); return queries; diff --git a/src/bufr/BufrParser/Exports/Variables/QueryVariable.h b/src/bufr/BufrParser/Exports/Variables/QueryVariable.h index e900d38b9..f809bc6e8 100644 --- a/src/bufr/BufrParser/Exports/Variables/QueryVariable.h +++ b/src/bufr/BufrParser/Exports/Variables/QueryVariable.h @@ -28,6 +28,7 @@ namespace Ingester explicit QueryVariable(const std::string& exportName, const std::string& query, const std::string& groupByField, + const std::string& type, const Transforms& transforms); ~QueryVariable() final = default; @@ -46,6 +47,9 @@ namespace Ingester /// \brief The for field of interest std::string groupByField_; + /// \brief Optional type override string + std::string type_; + /// \brief Collection of transforms to apply to the data during export Transforms transforms_; }; diff --git a/src/bufr/BufrParser/Exports/Variables/Transforms/OffsetTransform.cpp b/src/bufr/BufrParser/Exports/Variables/Transforms/OffsetTransform.cpp index 826a79ec4..5b048f44a 100644 --- a/src/bufr/BufrParser/Exports/Variables/Transforms/OffsetTransform.cpp +++ b/src/bufr/BufrParser/Exports/Variables/Transforms/OffsetTransform.cpp @@ -19,16 +19,7 @@ namespace Ingester void OffsetTransform::apply(std::shared_ptr& dataObject) { - if (auto object = std::dynamic_pointer_cast>(dataObject)) - { - auto data = object->getRawData(); - for (auto& val : data) - { - if (val != bufr::MissingValue) val += offset_; - } - - object->setRawData(data); - } + dataObject->offsetBy(offset_); } } // namespace Ingester diff --git a/src/bufr/BufrParser/Exports/Variables/Transforms/ScalingTransform.cpp b/src/bufr/BufrParser/Exports/Variables/Transforms/ScalingTransform.cpp index 53e02e21b..c895acb96 100644 --- a/src/bufr/BufrParser/Exports/Variables/Transforms/ScalingTransform.cpp +++ b/src/bufr/BufrParser/Exports/Variables/Transforms/ScalingTransform.cpp @@ -18,15 +18,6 @@ namespace Ingester void ScalingTransform::apply(std::shared_ptr& dataObject) { - if (auto object = std::dynamic_pointer_cast>(dataObject)) - { - auto data = object->getRawData(); - for (auto& val : data) - { - if (val != bufr::MissingValue) val *= scaling_; - } - - object->setRawData(data); - } + dataObject->multiplyBy(scaling_); } } // namespace Ingester diff --git a/src/bufr/BufrParser/Exports/Variables/Variable.h b/src/bufr/BufrParser/Exports/Variables/Variable.h index ed55c40c7..1c82883da 100644 --- a/src/bufr/BufrParser/Exports/Variables/Variable.h +++ b/src/bufr/BufrParser/Exports/Variables/Variable.h @@ -21,6 +21,7 @@ namespace Ingester std::string name; std::string query; std::string groupByField; + std::string type; }; typedef std::string QueryName; diff --git a/src/bufr/BufrParser/Query/Constants.h b/src/bufr/BufrParser/Query/Constants.h index 8675672d8..b67dd6cf5 100644 --- a/src/bufr/BufrParser/Query/Constants.h +++ b/src/bufr/BufrParser/Query/Constants.h @@ -9,6 +9,6 @@ namespace Ingester { namespace bufr { /// \brief The missing data value for all BUFR data. - const float MissingValue = 10e10; + const double MissingValue = 10.0e10; } // Ingester } // bufr diff --git a/src/bufr/BufrParser/Query/DataProvider.cpp b/src/bufr/BufrParser/Query/DataProvider.cpp index b7b8a63f5..2506c6dbb 100644 --- a/src/bufr/BufrParser/Query/DataProvider.cpp +++ b/src/bufr/BufrParser/Query/DataProvider.cpp @@ -12,6 +12,8 @@ #include #include +#include "eckit/exception/Exceptions.h" + namespace { const char* Subset = "SUB"; @@ -32,6 +34,7 @@ namespace bufr { void DataProvider::updateData(int bufrLoc) { + bufrLoc_ = bufrLoc; int size = 0; int *intPtr = nullptr; double *dataPtr = nullptr; @@ -98,5 +101,60 @@ namespace bufr { { delete_table_data_f(); } + + TypeInfo DataProvider::getTypeInfo(FortranIdx idx) const + { + static const unsigned int UNIT_STR_LEN = 24; + static const unsigned int DESC_STR_LEN = 55; + + char unitCStr[UNIT_STR_LEN]; + char descCStr[DESC_STR_LEN]; + + int retVal; + TypeInfo info; + + nemdefs_f(fileUnit_, + getTag(idx).c_str(), + unitCStr, + UNIT_STR_LEN, + descCStr, + DESC_STR_LEN, + &retVal); + + if (retVal == 0) + { + // trim the unit string + auto unitStr = std::string(unitCStr); + size_t end = unitStr.find_last_not_of(" \n\r\t\f\v"); + unitStr = (end == std::string::npos) ? "" : unitStr.substr(0, end + 1); + info.unit = unitStr; + + // trim the unit string + auto descStr = std::string(descCStr); + end = descStr.find_last_not_of(" \n\r\t\f\v"); + descStr = (end == std::string::npos) ? "" : descStr.substr(0, end + 1); + info.description = descStr; + + int descriptor; + int table_idx; + char table_type; + + nemtab_f(bufrLoc_, + getTag(idx).c_str(), + &descriptor, + &table_type, + &table_idx); + + nemtbb_f(bufrLoc_, + table_idx, + unitCStr, + UNIT_STR_LEN, + &info.scale, + &info.reference, + &info.bits); + } + + return info; + } } // namespace bufr } // namespace Ingester diff --git a/src/bufr/BufrParser/Query/DataProvider.h b/src/bufr/BufrParser/Query/DataProvider.h index 75aad66dd..7635d8190 100644 --- a/src/bufr/BufrParser/Query/DataProvider.h +++ b/src/bufr/BufrParser/Query/DataProvider.h @@ -9,8 +9,8 @@ #include #include +#include #include - #include namespace Ingester{ @@ -31,11 +31,48 @@ namespace bufr { Character }; + struct TypeInfo + { + int scale = 0; + int reference = 0; + int bits = 0; + std::string unit; + std::string description; + + bool isString() const { return unit == "CCITT IA5"; } + bool isSigned() const + { + // To better support Fortran clients for the generated ObsGroups we will assume all + // fields are signed. Otherwise this code would be reference < 0. + return true; + } + bool isInteger() const { return scale <= 0; } + bool is64Bit() const + { + bool is64Bit; + if (isInteger() && !isSigned()) + { + is64Bit = (log2((pow(2, bits) - 1) / pow(10, scale) + reference) > 32); + } + else if (isInteger() && isSigned()) + { + is64Bit = (log2(fmax(-1 * reference, + (pow(2, bits - 1) - 1) / pow(10, scale) + reference) * 2) + 1 > 32); + } + else + { + is64Bit = false; + } + + return is64Bit; + } + }; + /// \brief Responsible for exposing the data found in a BUFR file in a C friendly way. class DataProvider { public: - DataProvider() = default; + explicit DataProvider(int fileUnit) : fileUnit_(fileUnit) {} ~DataProvider() = default; /// \brief Read the data from the BUFR interface for the current subset and reset the @@ -63,8 +100,10 @@ namespace bufr { inline FortranIdx getNVal() const { return nval_; } inline FortranIdx getInv(FortranIdx idx) const { return inv_[idx - 1]; } inline double getVal(FortranIdx idx) const { return val_[idx - 1]; } + TypeInfo getTypeInfo(FortranIdx idx) const; private: + int fileUnit_; std::string subset_; // Table data; @@ -78,6 +117,7 @@ namespace bufr { // Subset data int inode_; int nval_; + int bufrLoc_; gsl::span val_; gsl::span inv_; }; diff --git a/src/bufr/BufrParser/Query/File.cpp b/src/bufr/BufrParser/Query/File.cpp index d4236bf54..2684baf30 100644 --- a/src/bufr/BufrParser/Query/File.cpp +++ b/src/bufr/BufrParser/Query/File.cpp @@ -67,7 +67,7 @@ namespace bufr { int bufrLoc; int il, im; // throw away - auto dataProvider = DataProvider(); + auto dataProvider = DataProvider(fileUnit_); auto resultSet = ResultSet(querySet.names()); auto query = Query(querySet, resultSet, dataProvider); @@ -84,6 +84,8 @@ namespace bufr { if (next > 0 && ++messageNum >= next) break; } + resultSet.setTargets(query.getTargets()); + dataProvider.deleteData(); return resultSet; diff --git a/src/bufr/BufrParser/Query/Query.cpp b/src/bufr/BufrParser/Query/Query.cpp index 45179369b..af1b685ac 100644 --- a/src/bufr/BufrParser/Query/Query.cpp +++ b/src/bufr/BufrParser/Query/Query.cpp @@ -26,22 +26,22 @@ namespace bufr { Query::Query(const QuerySet &querySet, ResultSet &resultSet, const DataProvider &dataProvider) : - querySet_(querySet), - resultSet_(resultSet), - dataProvider_(dataProvider) { + querySet_(querySet), + resultSet_(resultSet), + dataProvider_(dataProvider) + { } void Query::query() { - std::shared_ptr> targets; + Targets targets; std::shared_ptr<__details::ProcessingMasks> masks; findTargets(targets, masks); - return collectData(targets, masks, resultSet_); + collectData(targets, masks, resultSet_); } - void Query::findTargets(std::shared_ptr> &targets, - std::shared_ptr<__details::ProcessingMasks> &masks) { - // Check if the target list for this subset is cached in the targetMap_ + void Query::findTargets(Targets &targets, std::shared_ptr<__details::ProcessingMasks> &masks) { + // Check if the target list for this subset is cached if (targetCache_.find(dataProvider_.getSubset()) != targetCache_.end()) { targets = targetCache_.at(dataProvider_.getSubset()); masks = maskCache_.at(dataProvider_.getSubset()); @@ -49,7 +49,6 @@ namespace bufr { } masks = std::make_shared<__details::ProcessingMasks>(); - targets = std::make_shared>(); size_t numNodes = dataProvider_.getIsc(dataProvider_.getInode()); @@ -63,20 +62,20 @@ namespace bufr { auto subQueries = QueryParser::splitMultiquery(queryStr); bool foundTarget = false; - __details::Target target; + std::shared_ptr target; for (size_t subQueryIdx = 0; subQueryIdx < subQueries.size(); ++subQueryIdx) { const std::string &subQuery = subQueries[subQueryIdx]; target = findTarget(queryName, subQuery); - if (target.nodeIds.size() > 0) { + if (target->nodeIds.size() > 0) { // Collect mask data - masks->valueNodeMask[target.nodeIds[0]] = true; - for (size_t pathIdx = 0; pathIdx < target.seqPath.size(); ++pathIdx) { - masks->pathNodeMask[target.seqPath[pathIdx]] = true; + masks->valueNodeMask[target->nodeIds[0]] = true; + for (size_t pathIdx = 0; pathIdx < target->seqPath.size(); ++pathIdx) { + masks->pathNodeMask[target->seqPath[pathIdx]] = true; } - targets->push_back(target); + targets.push_back(target); foundTarget = true; break; } @@ -84,7 +83,7 @@ namespace bufr { if (!foundTarget) { // Add the last missing target to the list - targets->push_back(target); + targets.push_back(target); oops::Log::warning() << "Warning: Query String " << queryStr << " didn't apply to subset " @@ -97,8 +96,8 @@ namespace bufr { maskCache_.insert({dataProvider_.getSubset(), masks}); } - __details::Target Query::findTarget(const std::string &targetName, - const std::string &query) const { + std::shared_ptr Query::findTarget(const std::string &targetName, + const std::string &query) const { std::string querySubset; std::vector mnemonics; int index; @@ -106,7 +105,6 @@ namespace bufr { QueryParser::splitQueryStr(query, querySubset, mnemonics, index); std::vector branches; - bool isString = false; std::vector targetNodes; std::vector seqPath; std::vector dimPaths; @@ -141,8 +139,6 @@ namespace bufr { dataProvider_.getTag(nodeIdx) == mnemonics.back()) { // We found a target targetNodes.push_back(nodeIdx); - isString = (dataProvider_.getItp(nodeIdx) == 3); - getDimInfo(branches, mnemonicCursor, dimPaths, dimIdxs); } @@ -211,19 +207,20 @@ namespace bufr { } } - auto target = __details::Target(); - target.name = targetName; - target.queryStr = query; - target.isString = isString; - target.seqPath = branches; - target.nodeIds = targetNodes; + auto target = std::make_shared(); + target->name = targetName; + target->queryStr = query; + target->seqPath = branches; + target->nodeIds = targetNodes; if (targetNodes.size() > 0) { - target.dimPaths = dimPaths; - target.exportDimIdxs = dimIdxs; + target->dimPaths = dimPaths; + target->exportDimIdxs = dimIdxs; + target->typeInfo = dataProvider_.getTypeInfo(targetNodes[0]); } else { - target.dimPaths = {"*"}; - target.exportDimIdxs = {0}; + target->dimPaths = {"*"}; + target->exportDimIdxs = {0}; + target->typeInfo = TypeInfo(); } return target; @@ -277,7 +274,7 @@ namespace bufr { } } - void Query::collectData(std::shared_ptr> targets, + void Query::collectData(Targets& targets, std::shared_ptr<__details::ProcessingMasks> masks, ResultSet &resultSet) const { std::vector currentPath; @@ -378,42 +375,22 @@ namespace bufr { } } - for (size_t targetIdx = 0; targetIdx < targets->size(); targetIdx++) { - const auto &targ = targets->at(targetIdx); + for (size_t targetIdx = 0; targetIdx < targets.size(); targetIdx++) { + const auto &targ = targets.at(targetIdx); auto &dataField = dataFrame.fieldAtIdx(targetIdx); - dataField.name = targ.name; - dataField.queryStr = targ.queryStr; - dataField.isString = targ.isString; - if (targ.isString) - resultSet.indicateFieldIsString(targetIdx); // Whole column is string. - dataField.dimPaths = targ.dimPaths; - dataField.seqPath.resize(targ.seqPath.size() + 1); - dataField.seqPath[0] = 1; - std::copy(targ.seqPath.begin(), - targ.seqPath.end(), - std::back_inserter(dataField.seqPath)); - dataField.exportDims = targ.exportDimIdxs; - - if (targ.nodeIds.size() == 0) { + dataField.target = targ; + + if (targ->nodeIds.size() == 0) { dataField.data = {MissingValue}; - dataField.missing = true; dataField.seqCounts = {{1}}; } else { - dataField.seqCounts.resize(targ.seqPath.size() + 1); + dataField.seqCounts.resize(targ->seqPath.size() + 1); dataField.seqCounts[0] = {1}; - for (size_t pathIdx = 0; pathIdx < targ.seqPath.size(); pathIdx++) { - dataField.seqCounts[pathIdx + 1] = dataTable[targ.seqPath[pathIdx] + 1].counts; - } - - if (resultSet.isFieldStr(targetIdx) != targ.isString) { - std::ostringstream errMsg; - errMsg << "Different subsets don't agree whether " << dataField.name - << "is a string or not (there is a type mismatch)."; - throw eckit::BadParameter(errMsg.str()); + for (size_t pathIdx = 0; pathIdx < targ->seqPath.size(); pathIdx++) { + dataField.seqCounts[pathIdx + 1] = dataTable[targ->seqPath[pathIdx] + 1].counts; } - dataField.data = dataTable[targ.nodeIds[0]].values; - if (dataField.data.size() == 0) dataField.missing = true; + dataField.data = dataTable[targ->nodeIds[0]].values; } } } diff --git a/src/bufr/BufrParser/Query/Query.h b/src/bufr/BufrParser/Query/Query.h index 27db12add..2075466f2 100644 --- a/src/bufr/BufrParser/Query/Query.h +++ b/src/bufr/BufrParser/Query/Query.h @@ -15,24 +15,12 @@ #include "QuerySet.h" #include "ResultSet.h" #include "DataProvider.h" +#include "Target.h" namespace Ingester { namespace bufr { namespace __details { - /// \brief The information or Meta data for a BUFR field whose data we wish to capture when - /// we execute a query. - /// \note Will be refactored to use the SubsetTable object. - struct Target { - std::string name; - std::string queryStr; - bool isString; - std::vector seqPath; - std::vector nodeIds; - std::vector dimPaths; - std::vector exportDimIdxs; - }; - /// \brief BUFR messages are indexed according to start and stop values that are dependant /// on the message itself (the indexing is a property of the message). This object allows /// lets you make an array where the indexing is offset with respect to the actual position @@ -73,14 +61,27 @@ namespace bufr { Query(const QuerySet& querySet, ResultSet& resultSet, const DataProvider& dataProvider); void query(); + Targets getTargets() + { + Targets targets; + for (auto& subset : targetCache_) + { + for (auto& target : subset.second) + { + targets.push_back(target); + } + } + return targets; + } + private: const QuerySet querySet_; ResultSet& resultSet_; const DataProvider& dataProvider_; - std::unordered_map>> - targetCache_; + std::unordered_map targetCache_; std::unordered_map> maskCache_; + std::unordered_map> unitCache_; /// \brief Look for the list of targets for the currently active BUFR message subset that @@ -88,15 +89,15 @@ namespace bufr { /// order to make the data collection more efficient. /// \param[in, out] targets The list of targets to populate. /// \param[in, out] masks The processing masks to populate. - void findTargets(std::shared_ptr>& targets, + void findTargets(Targets& targets, std::shared_ptr<__details::ProcessingMasks>& masks); /// \brief Find the target associated with a specific user provided query string. /// \param[in] targetName The name specified for the target. /// \param[in] query The query string to use. - __details::Target findTarget(const std::string& targetName, - const std::string& query) const; + std::shared_ptr findTarget(const std::string& targetName, + const std::string& query) const; /// \brief Does the node idx correspond to an element you'd find in a query string (repeat @@ -120,7 +121,7 @@ namespace bufr { /// \param[in] targets The list of targets to collect for this subset. /// \param[in] masks The processing masks to use. /// \param[in, out] resultSet The object used to store the accumulated collected data. - void collectData(std::shared_ptr> targets, + void collectData(Targets& targets, std::shared_ptr<__details::ProcessingMasks> masks, ResultSet& resultSet) const; }; diff --git a/src/bufr/BufrParser/Query/ResultSet.cpp b/src/bufr/BufrParser/Query/ResultSet.cpp index eda59c556..3b975c10c 100644 --- a/src/bufr/BufrParser/Query/ResultSet.cpp +++ b/src/bufr/BufrParser/Query/ResultSet.cpp @@ -7,8 +7,11 @@ #include "ResultSet.h" +#include "eckit/exception/Exceptions.h" + #include #include +#include #include "Constants.h" #include "VectorMath.h" @@ -26,82 +29,44 @@ namespace bufr { { } - std::shared_ptr ResultSet::get(const std::string& fieldName, - const std::string& groupByFieldName) const + std::shared_ptr + ResultSet::get(const std::string& fieldName, + const std::string& groupByFieldName, + const std::string& overrideType) const { std::vector data; std::vector dims; std::vector dimPaths; + TypeInfo info; getRawValues(fieldName, groupByFieldName, data, dims, - dimPaths); - - - std::shared_ptr result; - if (isString(fieldName)) - { - auto strData = std::vector(); - - const char* charPtr = reinterpret_cast(data.data()); - for (int row_idx = 0; row_idx < dims[0]; row_idx++) - { - if (data.data()[row_idx] != MissingValue) - { - std::string str = std::string( - charPtr + row_idx * sizeof(double), sizeof(double)); - - // trim trailing whitespace from str - str.erase(std::find_if(str.rbegin(), str.rend(), - [](char c){ return !std::isspace(c); }).base(), - str.end()); - - strData.push_back(str); - } - else - { - strData.push_back(""); - } - } - - auto strResult = std::make_shared>(); - strResult->field_name = fieldName; - strResult->group_by_field_name = groupByFieldName; - strResult->data = strData; - strResult->dims.push_back(dims[0]); - result = strResult; - } - else - { - // Compute product of dimensions - int tot_elements = 1; - for (const auto& dim : dims) - { - tot_elements *= dim; - } - - auto floatResult = std::make_shared>(); - floatResult->field_name = fieldName; - floatResult->group_by_field_name = groupByFieldName; - floatResult->data = std::vector(data.data(), data.data() + tot_elements); - floatResult->dims = dims; - result = floatResult; - } + dimPaths, + info); // Add dim path strings const char* ws = " \t\n\r\f\v"; + std::vector paths(dims.size()); for (size_t dimIdx = 0; dimIdx < dims.size(); dimIdx++) { auto path_str = dimPaths[dimIdx]; // Trim extra chars from the path str path_str.erase(path_str.find_last_not_of(ws) + 1); - result->dimPaths.push_back(path_str); + paths[dimIdx] = path_str; } - return result; + std::shared_ptr object = makeDataObject(fieldName, + groupByFieldName, + info, + overrideType, + data, + dims, + paths); + + return object; } @@ -115,7 +80,8 @@ namespace bufr { const std::string& groupByField, std::vector& data, std::vector& dims, - std::vector& dimPaths) const + std::vector& dimPaths, + TypeInfo& info) const { // Find the dims based on the largest sequence counts in the fields @@ -137,18 +103,19 @@ namespace bufr { } auto& targetField = dataFrames_[0].fieldAtIdx(targetFieldIdx); - dimPaths = targetField.dimPaths; + dimPaths = targetField.target->dimPaths; - exportDims = targetField.exportDims; + exportDims = targetField.target->exportDimIdxs; } for (auto& dataFrame : dataFrames_) { auto& targetField = dataFrame.fieldAtIdx(targetFieldIdx); - if (!targetField.dimPaths.empty() && dimPaths.size() < targetField.dimPaths.size()) + if (!targetField.target->dimPaths.empty() && + dimPaths.size() < targetField.target->dimPaths.size()) { - dimPaths = targetField.dimPaths; - exportDims = targetField.exportDims; + dimPaths = targetField.target->dimPaths; + exportDims = targetField.target->exportDimIdxs; } size_t dimsLen = targetField.seqCounts.size(); @@ -166,6 +133,16 @@ namespace bufr { } } + info.reference = std::min(info.reference, targetField.target->typeInfo.reference); + info.bits = std::max(info.bits, targetField.target->typeInfo.bits); + + if (std::abs(targetField.target->typeInfo.scale) > info.scale) + { + info.scale = targetField.target->typeInfo.scale; + } + + if (info.unit.empty()) info.unit = targetField.target->typeInfo.unit; + if (groupByField != "") { auto& groupByField = dataFrame.fieldAtIdx(groupByFieldIdx); @@ -173,7 +150,7 @@ namespace bufr { if (groupbyIdx > static_cast(dimsList.size())) { - dimPaths = {groupByField.dimPaths.back()}; + dimPaths = {groupByField.target->dimPaths.back()}; int groupbyElementsForFrame = 1; for (auto &seqCount : groupByField.seqCounts) @@ -189,11 +166,11 @@ namespace bufr { else { dimPaths = {}; - for (size_t targetIdx = groupByField.exportDims.size() - 1; - targetIdx < targetField.dimPaths.size(); + for (size_t targetIdx = groupByField.target->exportDimIdxs.size() - 1; + targetIdx < targetField.target->dimPaths.size(); ++targetIdx) { - dimPaths.push_back(targetField.dimPaths[targetIdx]); + dimPaths.push_back(targetField.target->dimPaths[targetIdx]); } } } @@ -277,7 +254,7 @@ namespace bufr { std::vector> frameData; auto& targetField = dataFrame.fieldAtIdx(targetFieldIdx); - if (!targetField.missing) { + if (!targetField.data.size() == 0) { getRowsForField(targetField, frameData, allDims, @@ -405,10 +382,130 @@ namespace bufr { } } - bool ResultSet::isString(const std::string& fieldName) const + std::string ResultSet::unit(const std::string& fieldName) const { auto fieldIdx = dataFrames_.front().fieldIndexForNodeNamed(fieldName); - return dataFrames_.front().fieldAtIdx(fieldIdx).isString; + return dataFrames_.front().fieldAtIdx(fieldIdx).target->unit; + } + + std::shared_ptr ResultSet::makeDataObject( + const std::string& fieldName, + const std::string& groupByFieldName, + TypeInfo& info, + const std::string& overrideType, + const std::vector data, + const std::vector dims, + const std::vector dimPaths) const + { + std::shared_ptr object; + if (overrideType.empty()) + { + object = objectByTypeInfo(info); + } + else + { + object = objectByType(overrideType); + + if ((overrideType == "string" && !info.isString()) || + (overrideType != "string" && info.isString())) + { + std::ostringstream errMsg; + errMsg << "Conversions between numbers and strings are not currently supported. "; + errMsg << "See the export definition for \"" << fieldName << "\"."; + throw eckit::BadParameter(errMsg.str()); + } + } + + object->setData(data, 10e10); + object->setDims(dims); + object->setFieldName(fieldName); + object->setGroupByFieldName(groupByFieldName); + object->setDimPaths(dimPaths); + + return object; + } + + std::shared_ptr ResultSet::objectByTypeInfo(TypeInfo &info) const + { + std::shared_ptr object; + + if (info.isString()) + { + object = std::make_shared>(); + } + else if (info.isInteger()) + { + if (info.isSigned()) + { + if (info.is64Bit()) + { + object = std::make_shared>(); + } + else + { + object = std::make_shared>(); + } + } + else + { + if (info.is64Bit()) + { + object = std::make_shared>(); + } + else + { + object = std::make_shared>(); + } + } + } + else + { + if (info.is64Bit()) + { + object = std::make_shared>(); + } + else + { + object = std::make_shared>(); + } + } + + return object; } + + std::shared_ptr ResultSet::objectByType(const std::string& overrideType) const + { + std::shared_ptr object; + + if (overrideType == "int" || overrideType == "int32") + { + object = std::make_shared>(); + } + else if (overrideType == "float") + { + object = std::make_shared>(); + } + else if (overrideType == "double") + { + object = std::make_shared>(); + } + else if (overrideType == "string") + { + object = std::make_shared>(); + } + else if (overrideType == "int64") + { + object = std::make_shared>(); + } + else + { + std::ostringstream errMsg; + errMsg << "Unknown or unsupported type " << overrideType << "."; + throw eckit::BadParameter(errMsg.str()); + } + + return object; + } + } // namespace bufr } // namespace Ingester diff --git a/src/bufr/BufrParser/Query/ResultSet.h b/src/bufr/BufrParser/Query/ResultSet.h index 756e955af..2ce61192f 100644 --- a/src/bufr/BufrParser/Query/ResultSet.h +++ b/src/bufr/BufrParser/Query/ResultSet.h @@ -11,8 +11,13 @@ #include #include #include +#include #include +#include "DataProvider.h" +#include "DataObject.h" +#include "Target.h" + namespace Ingester { namespace bufr { @@ -21,15 +26,9 @@ namespace bufr { /// results data. struct DataField { - std::string name; - std::string queryStr; - bool isString; - bool missing = false; + std::shared_ptr target; std::vector data; - std::vector seqPath; std::vector> seqCounts; - std::vector dimPaths; - std::vector exportDims; }; /// \brief Container for a "row" of data (all the collected data for a message subset)., with a @@ -58,7 +57,7 @@ namespace bufr { auto result = -1; for (size_t fieldIdx = 0; fieldIdx < fields_.size(); fieldIdx++) { - if (fields_[fieldIdx].name == name) + if (fields_[fieldIdx].target->name == name) { result = fieldIdx; break; @@ -72,39 +71,6 @@ namespace bufr { std::vector fields_; }; - - /// \brief The base class for all Results. - struct ResultBase - { - std::string field_name; - std::string group_by_field_name; - std::vector dims; - std::vector dimPaths; - std::unordered_map fieldIdxMap_; - - virtual ~ResultBase() {} - virtual void print(std::ostream &out = std::cout) = 0; - }; - - /// \brief The resulting data created by the ResultSet. - template - struct Result : ResultBase - { - typedef T value_type; - std::vector data; - - /// \brief Print the data to stdout. - void print(std::ostream &out = std::cout) final - { - std::cout << data.size() << std::endl; - for (auto val = data.cbegin(); val != data.cend(); ++val) - { - if (val != data.cbegin()) out << ", "; - out << *val; - } - } - }; - /// \brief This class acts as the container for all the data that is collected during the /// the BUFR querying process. Internally it arranges the data as DataFrames for each message /// subset observation. Each DataFrame contains a list of DataFields, one for each named element @@ -129,31 +95,22 @@ namespace bufr { /// optional groupByFieldName. /// \param fieldName The name of the field to get the data for. /// \param groupByFieldName The name of the field to group the data by. + /// \param overrideType The name of the override type to convert the data to. Possible + /// values are int, uint, int32, uint32, int64, uint64, float, double /// \return A Result object containing the data. - std::shared_ptr get(const std::string& fieldName, - const std::string& groupByFieldName = "") const; + std::shared_ptr + get(const std::string& fieldName, + const std::string& groupByFieldName = "", + const std::string& overrideType = "") const; /// \brief Adds a new DataFrame to the ResultSet and returns a reference to it. /// \return A reference to the new DataFrame. DataFrame& nextDataFrame(); - /// \brief Sets the first dataframe attribute to indicate that a DataField is the string - /// type. - /// \param fieldIdx The index of the field to set. - void indicateFieldIsString(int fieldIdx) - { - dataFrames_.front().fieldAtIdx(fieldIdx).isString = true; - } - - /// \brief Checks if a DataField is the string type. - /// \param fieldIdx The index of the field. - /// \return True if the field is the string type. - bool isFieldStr(int fieldIdx) - { - return dataFrames_.front().fieldAtIdx(fieldIdx).isString; - } + void setTargets(Targets targets) { targets_ = targets; } private: + Targets targets_; std::vector dataFrames_; std::vector names_; std::vector fieldWidths; @@ -165,11 +122,13 @@ namespace bufr { /// \param groupByFieldName The name of the field to group the data by. /// \param dims The size of the dimensions of the result data (any number of dimensions). /// \param dimPaths The dimensioning sub-query path strings. + /// \param info The meta data for the element. void getRawValues(const std::string& fieldName, const std::string& groupByField, std::vector& data, std::vector& dims, - std::vector& dimPaths) const; + std::vector& dimPaths, + TypeInfo& info) const; /// \brief Retrieves the data for the specified target field, one row per message subset. /// The dims are used to determine the filling pattern so that that the resulting data can @@ -185,7 +144,36 @@ namespace bufr { /// \brief Is the field a string field? /// \param fieldName The name of the field. - bool isString(const std::string& fieldName) const; + std::string unit(const std::string& fieldName) const; + + /// \brief Make an appropriate DataObject for the data considering all the META data + /// \param fieldName The name of the field to get the data for. + /// \param groupByFieldName The name of the field to group the data by. + /// \param info The meta data for the element. + /// \param overrideType The name of the override type to convert the data to. Possible + /// values are int, uint, int32, uint32, int64, uint64, float, double + /// \param data The data + /// \param dims The dimensioning information + /// \param dimPaths The sub-query path strings for each dimension. + /// \return A Result DataObject containing the data. + std::shared_ptr makeDataObject( + const std::string& fieldName, + const std::string& groupByFieldName, + TypeInfo& info, + const std::string& overrideType, + const std::vector data, + const std::vector dims, + const std::vector dimPaths) const; + + /// \brief Make an appropriate DataObject for data with the TypeInfo + /// \param info The meta data for the element. + /// \return A Result DataObject containing the data. + std::shared_ptr objectByTypeInfo(TypeInfo& info) const; + + /// \brief Make an appropriate DataObject for data with the override type + /// \param overrideType The meta data for the element. + /// \return A Result DataObject containing the data. + std::shared_ptr objectByType(const std::string& overrideType) const; }; } // namespace bufr } // namespace Ingester diff --git a/src/bufr/BufrParser/Query/SubsetTable.cpp b/src/bufr/BufrParser/Query/SubsetTable.cpp index 3fd7c7a4e..804ffed8f 100644 --- a/src/bufr/BufrParser/Query/SubsetTable.cpp +++ b/src/bufr/BufrParser/Query/SubsetTable.cpp @@ -111,6 +111,7 @@ namespace bufr { query->dimIdxs = dimPathIdxs(seqPath); query->idx = numElements; query->requiresIdx = (numElements > 1); + query->typeInfo = dataProvider_.getTypeInfo(nodeIdx); allQueries.push_back(query); foundQueryMap[mapKey(query->pathComponents, numElements)] = allQueries.back(); diff --git a/src/bufr/BufrParser/Query/SubsetTable.h b/src/bufr/BufrParser/Query/SubsetTable.h index 8d2c8f0f0..0f56b22a2 100644 --- a/src/bufr/BufrParser/Query/SubsetTable.h +++ b/src/bufr/BufrParser/Query/SubsetTable.h @@ -27,36 +27,9 @@ namespace bufr { std::vector dimIdxs; size_t idx; bool requiresIdx; + TypeInfo typeInfo; }; -// struct Node -// { -// Typ type; -// size_t nodeIdx; -// std::string mnemonic; -// }; -// -// struct Sequence : Node -// { -// std::vector> children = {}; -// }; -// -// struct FixedRep : Node -// { -// size_t size; -// Sequence sequence; -// }; -// -// struct DelayedRep : Node -// { -// Sequence sequence; -// }; -// -// struct Leaf : Node -// { -// std::string type; -// }; - /// \brief Parses the BUFR message subset Meta data tables. class SubsetTable { diff --git a/src/bufr/BufrParser/Query/Target.h b/src/bufr/BufrParser/Query/Target.h new file mode 100644 index 000000000..d2649ed65 --- /dev/null +++ b/src/bufr/BufrParser/Query/Target.h @@ -0,0 +1,34 @@ +/* + * (C) Copyright 2022 NOAA/NWS/NCEP/EMC + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + */ + +#pragma once + +#include +#include +#include + +#include "DataProvider.h" + +namespace Ingester { +namespace bufr { + /// \brief The information or Meta data for a BUFR field whose data we wish to capture when + /// we execute a query. + struct Target + { + std::string name; + std::string queryStr; + std::string unit; + std::vector seqPath; + std::vector nodeIds; + std::vector dimPaths; + std::vector exportDimIdxs; + TypeInfo typeInfo; + }; + + typedef std::vector> Targets; +} // namespace bufr +} // namespace Ingester diff --git a/src/bufr/CMakeLists.txt b/src/bufr/CMakeLists.txt index 6d85e9fe2..8d1f27042 100644 --- a/src/bufr/CMakeLists.txt +++ b/src/bufr/CMakeLists.txt @@ -49,6 +49,7 @@ list(APPEND _ingester_srcs BufrParser/Query/QueryParser.cpp BufrParser/Query/ResultSet.h BufrParser/Query/ResultSet.cpp + BufrParser/Query/Target.h IodaEncoder/IodaEncoder.cpp IodaEncoder/IodaEncoder.h IodaEncoder/IodaDescription.cpp diff --git a/src/bufr/DataObject.cpp b/src/bufr/DataObject.cpp index 16a239b0e..9c34ba1fb 100644 --- a/src/bufr/DataObject.cpp +++ b/src/bufr/DataObject.cpp @@ -14,35 +14,4 @@ namespace Ingester { - std::shared_ptr - DataObjectBase::fromResult(const std::shared_ptr& resultBase, - const std::string& query) - { - std::shared_ptr dataObject = nullptr; - - if (auto result = std::dynamic_pointer_cast>(resultBase)) - { - dataObject = std::make_shared> (result->data, - result->field_name, - result->group_by_field_name, - result->dims, - query, - result->dimPaths); - } - else if (auto result = std::dynamic_pointer_cast>(resultBase)) - { - dataObject = std::make_shared> (result->data, - result->field_name, - result->group_by_field_name, - result->dims, - query, - result->dimPaths); - } - else - { - throw eckit::BadParameter("Encountered unsupported Result Type."); - } - - return dataObject; - } } // namespace Ingester diff --git a/src/bufr/DataObject.h b/src/bufr/DataObject.h index eb36e6910..57aff3158 100644 --- a/src/bufr/DataObject.h +++ b/src/bufr/DataObject.h @@ -18,7 +18,6 @@ #include "ioda/defs.h" #include "BufrParser/Query/Constants.h" -#include "BufrParser/Query/ResultSet.h" namespace Ingester { @@ -30,10 +29,6 @@ namespace Ingester class DataObjectBase { public: - static std::shared_ptr - fromResult(const std::shared_ptr& resultBase, - const std::string& query); - explicit DataObjectBase(const std::string& fieldName, const std::string& groupByFieldName, const Dimensions& dims, @@ -47,8 +42,17 @@ namespace Ingester dimPaths_(dimPaths) {}; + DataObjectBase() = default; virtual ~DataObjectBase() = default; + // Setters + void setFieldName(const std::string& fieldName) { fieldName_ = fieldName; } + void setGroupByFieldName(const std::string& fieldName) { groupByFieldName_ = fieldName; } + void setDims(const std::vector dims) { dims_ = dims; } + void setQuery(const std::string& query) { query_ = query; } + void setDimPaths(const std::vector& dimPaths) { dimPaths_ = dimPaths; } + virtual void setData(const std::vector& data, double dataMissingValue) = 0; + // Getters std::string getFieldName() const { return fieldName_; } std::string getGroupByFieldName() const { return groupByFieldName_; } @@ -98,6 +102,14 @@ namespace Ingester virtual std::shared_ptr slice(const std::vector& rows) const = 0; + /// \brief Multiply the stored values in this data object by a scalar. + /// \param val Scalar to multiply to the data.. + virtual void multiplyBy(double val) = 0; + + /// \brief Add a scalar to the stored values in this data object. + /// \param val Scalar to add to the data.. + virtual void offsetBy(double val) = 0; + protected: std::string fieldName_; std::string groupByFieldName_; @@ -112,9 +124,12 @@ namespace Ingester { public: typedef T value_type; + constexpr T missingValue() const { return std::numeric_limits::max(); } /// \brief Constructor. /// \param dimensions The dimensions of the data object. + DataObject() = default; + DataObject(const std::vector& data, const std::string& field_name, const std::string& group_by_field_name, @@ -127,6 +142,18 @@ namespace Ingester ~DataObject() = default; + /// \brief Set the data for this object + /// \param data The data vector + void setData(const std::vector& data) { data_ = data; } + + /// \brief Set the data for this object + /// \param data The data vector + /// \param dataMissingValue The missing value used in the raw data + void setData(const std::vector& data, double dataMissingValue) final + { + _setData(data, dataMissingValue); + } + /// \brief Makes an ioda::Variable and adds it to the given ioda::ObsGroup /// \param obsGroup Obsgroup were to add the variable /// \param name The name to associate with the variable (ex "latitude@MetaData") @@ -277,7 +304,7 @@ namespace Ingester params.chunk = true; params.chunks = chunks; params.compressWithGZIP(compressionLevel); - params.setFillValue(static_cast(bufr::MissingValue)); + params.setFillValue(static_cast(missingValue())); return params; } @@ -372,6 +399,131 @@ namespace Ingester typename std::enable_if::value, U>::type* = nullptr) const { throw std::runtime_error("The stored value was is not a number"); + return 0.0f; + } + + /// \brief Set the data associated with this data object (numeric DataObject). + /// \param data - double vector of raw data + /// \param dataMissingValue - The number that represents missing values within the raw data + template + void _setData(const std::vector& data, + double dataMissingValue, + typename std::enable_if::value, U>::type* = nullptr) + { + data_ = std::vector(data.begin(), data.end()); + std::replace(data_.begin(), + data_.end(), + static_cast(dataMissingValue), + missingValue()); + } + + /// \brief Set the data associated with this data object (string DataObject). + /// \param data - double vector of raw data + /// \param dataMissingValue - The number that represents missing values within the raw data + template + void _setData( + const std::vector& data, + double dataMissingValue, + typename std::enable_if::value, U>::type* = nullptr) + { + data_ = std::vector(); + auto charPtr = reinterpret_cast(data.data()); + for (size_t row_idx = 0; row_idx < data.size(); row_idx++) + { + if (data[row_idx] != dataMissingValue) + { + std::string str = std::string( + charPtr + row_idx * sizeof(double), sizeof(double)); + + // trim trailing whitespace from str + str.erase(std::find_if(str.rbegin(), str.rend(), + [](char c){ return !std::isspace(c); }).base(), + str.end()); + + data_.push_back(str); + } + else + { + data_.push_back(""); + } + } + } + + /// \brief Multiply the stored values in this data object by a scalar. + /// \param val Scalar to multiply to the data.. + void multiplyBy(double val) final + { + _multiplyBy(val); + } + + /// \brief Multiply the stored values in this data object by a scalar (numeric version). + /// \param val Scalar to multiply to the data. + template + void _multiplyBy(double val, + typename std::enable_if::value, U>::type* = nullptr) + { + if (typeid(T) == typeid(float) || // NOLINT + typeid(T) == typeid(double) || // NOLINT + trunc(val) == val) + { + for (size_t i = 0; i < data_.size(); i++) + { + if (data_[i] != missingValue()) + { + data_[i] = static_cast(static_cast(data_[i]) * val); + } + } + } + else + { + std::ostringstream str; + str << "Multiplying integer field \"" << fieldName_ << "\" with a non-integer is "; + str << "illegal. Please convert it to a float or double."; + throw std::runtime_error(str.str()); + } + } + + /// \brief Multiply the stored values in this data object by a scalar (string version). + /// \param val Scalar to multiply to the data. + template + void _multiplyBy( + double val, + typename std::enable_if::value, U>::type* = nullptr) + { + throw std::runtime_error("Trying to multiply a string by a number"); + } + + /// \brief Add a scalar to the stored values in this data object. + /// \param val Scalar to add to the data. + void offsetBy(double val) final + { + _offsetBy(val); + } + + + /// \brief Add a scalar to the stored values in this data object (numeric version). + /// \param val Scalar to add to the data. + template + void _offsetBy(double val, + typename std::enable_if::value, U>::type* = nullptr) + { + for (size_t i = 0; i < data_.size(); i++) + { + if (data_[i] != missingValue()) + { + data_[i] = data_[i] + static_cast(val); + } + } + } + + /// \brief Add a scalar to the stored values in this data object (string version). + /// \param val Scalar to add to the data. + template + void _offsetBy( + double val, + typename std::enable_if::value, U>::type* = nullptr) + { + throw std::runtime_error("Trying to offset a string by a number"); } }; } // namespace Ingester diff --git a/src/bufr/README.md b/src/bufr/README.md index 7dd8bdf4e..7fbff8c0a 100644 --- a/src/bufr/README.md +++ b/src/bufr/README.md @@ -67,6 +67,7 @@ Defines how to read data from the input BUFR file. Its sections are as follows: hoursFromUtc: 0 # Optional satellite_id: query: "*/SAID" + type: int64 longitude: query: "*/CLON" transforms: @@ -102,7 +103,8 @@ ioda encoder. It has the following sections: * **values** (One of these types): * `query` Query string which is used to get the data from the BUFR file. _(optional)_ Can apply a list of `tranforms` to the numeric (not string) data. Possible transforms are - `offset` and `scale`. + `offset` and `scale`. You can also manually override the type by specifying the `type` as + **int**, **int64**, **float**, or **double**. * `datetime` Associate **key** with data for mnemonics for `year`, `month`, `day`, `hour`, `minute`, _(optional)_ `second`, and _(optional)_ `hoursFromUtc` (must be an **integer**). Internally, the value stored is number of seconds elapsed since a reference epoch, currently @@ -182,8 +184,8 @@ The `ioda` section defines the ObsGroup objects that will be created. replaced with the relevant split category ID for that file to form a unique name for every file. * `dimensions` used to define dimension information in variables * `name` arbitrary name for the dimension - * `paths` - list of subqueries for that dimension (different paths for different BUFR subsets only) **or** `path` Single subquery for that dimension ex: - **\*/BRITCSTC** + * `paths` - list of subqueries for that dimension (different paths for different BUFR subsets + only) **or** `path` Single subquery for that dimension ex: **\*/BRITCSTC** * `variables` List of output variable objects to create. * `name` standardized pathname **group**/**var_name**. * **var_name** name for the variable diff --git a/test/testinput/ADPUPA.prepbufr b/test/testinput/ADPUPA.prepbufr index bd1c94867..c95db4987 100644 Binary files a/test/testinput/ADPUPA.prepbufr and b/test/testinput/ADPUPA.prepbufr differ diff --git a/test/testinput/adpupa_prepbufr.yaml b/test/testinput/adpupa_prepbufr.yaml index a654251b9..e815d61c4 100644 --- a/test/testinput/adpupa_prepbufr.yaml +++ b/test/testinput/adpupa_prepbufr.yaml @@ -68,6 +68,7 @@ observations: specificHumidity: query: "*/PRSLEVEL/Q___INFO/Q__EVENT/QOB" + type: float transforms: - scale: 0.000001 specificHumidityQM: diff --git a/test/testinput/bufr_ncep_adpsfc.yaml b/test/testinput/bufr_ncep_adpsfc.yaml index 43344ed66..571cd5fba 100644 --- a/test/testinput/bufr_ncep_adpsfc.yaml +++ b/test/testinput/bufr_ncep_adpsfc.yaml @@ -34,18 +34,23 @@ observations: # ObsValue pressure: query: "*/PRES" + type: float pressureReducedToMeanSeaLevel: query: "*/PMSL" - altimeterSettingQnh: + type: float + altimeterSetting: query: "*/ALSE" + type: float airTemperature: query: "*/TMDB" dewpointTemperature: query: "*/TMDP" windDirection: query: "*/WDIR" + type: float windSpeed: query: "*/WSPD" + type: float # ObsValue - ocean # DBSS (depth below water surface) not found @@ -58,6 +63,7 @@ observations: # CEILING (cloud ceiling) not found cloudCoverTotal: query: "*/TOCC" + type: float transforms: - scale: 0.01 cloudAmountDescription: @@ -66,12 +72,15 @@ observations: query: "*/HBLCS" heightOfBaseOfCloud: query: "[*/CLDSQ1/HOCB, */MTRCLD/HOCB, NC000100/HOCB, NC000101/HOCB, NC000102/HOCB]" + type: float verticalSignificanceSurfaceObservations: query: "[*/CLDSQ1/VSSO, */MTRCLD/VSSO, NC000100/VSSO[1], NC000101/VSSO[1], NC000102/VSSO[1]]" verticalVisibility: query: "[*/RPSEC3/VTVI, */VTVI]" + type: float horizontalVisibility: query: "*/HOVI" + type: float minimumTemperature: query: "[*/TMPSQ3/MITM, */MTTPSQ/MITM, NC000100/BSYEXTM/MITM[1], NC000101/BSYEXTM/MITM[1], NC000102/BSYEXTM/MITM[1]]" maximumTemperature: @@ -88,6 +97,9 @@ observations: query: "*/QMDD" pressureQM: query: "*/QMPR" + windQM: + query: "*/QMWN" + ioda: backend: netcdf @@ -143,10 +155,11 @@ observations: longName: "Method of Water Temperature Measurement" # ObsValue - - name: "ObsValue/altimeterSettingQnh" + - name: "ObsValue/altimeterSetting" coordinates: "longitude latitude" - source: variables/altimeterSettingQnh - longName: "Altimeter Setting Qnh" + source: variables/altimeterSetting + longName: "Altimeter Setting" + units: "Pa" - name: "ObsValue/pressure" coordinates: "longitude latitude" @@ -203,6 +216,7 @@ observations: source: variables/cloudCoverTotal longName: "Total Cloud Coverage" units: "1" + - name: "ObsValue/cloudAmountDescription" coordinates: "longitude latitude" source: variables/cloudAmountDescription @@ -212,7 +226,6 @@ observations: coordinates: "longitude latitude" source: variables/heightAboveSurfaceOfBaseOfLowestCloud longName: "Height Above Surface of Base of Lowest Cloud Seen" - units: "m" - name: "ObsValue/heightOfBaseOfCloud" coordinates: "longitude latitude" @@ -275,3 +288,9 @@ observations: coordinates: "longitude latitude" source: variables/pressureQM longName: "Quality Indicator for Pressure" + + - name: "QualityMarker/windDirection" + coordinates: "longitude latitude" + source: variables/windQM + longName: "Quality Indicator for Wind Direction" + diff --git a/test/testinput/bufr_ncep_mtiasi.yaml b/test/testinput/bufr_ncep_mtiasi.yaml index da2e17314..038a17652 100755 --- a/test/testinput/bufr_ncep_mtiasi.yaml +++ b/test/testinput/bufr_ncep_mtiasi.yaml @@ -73,6 +73,7 @@ observations: fractionOfClearPixelsInFov: query: "*/IASIL1CS/FCPH" + type: float transforms: - scale: 0.01 diff --git a/test/testinput/bufr_ncep_prepbufr_adpsfc.yaml b/test/testinput/bufr_ncep_prepbufr_adpsfc.yaml index c9f26fade..5ba6f514f 100644 --- a/test/testinput/bufr_ncep_prepbufr_adpsfc.yaml +++ b/test/testinput/bufr_ncep_prepbufr_adpsfc.yaml @@ -28,12 +28,14 @@ observations: query: "*/SID" stationElevation: query: "*/ELV" + type: float waterTemperatureMethod: query: "*/SST_INFO/MSST" # ObsValue heightOfObservation: query: "*/Z___INFO/Z__EVENT/ZOB" + type: float pressure: query: "*/P___INFO/P__EVENT/POB" transforms: @@ -51,6 +53,7 @@ observations: transforms: - offset: 273.15 specificHumidity: + type: float query: "*/Q___INFO/Q__EVENT/QOB" transforms: - scale: 0.000001 @@ -64,29 +67,35 @@ observations: query: "*/SST_INFO/SSTEVENT/SST1" heightOfWaves: query: "*/WAVE_SEQ/HOWV" + type: float depthBelowWaterSurface: query: "*/SST_INFO/DBSS_SEQ/DBSS" - + type: float # ObsValue - cloud, cloud ceiling, visibility, gust wind, min/max temperature, weather # note: cloud ceiling is a derivative of HOCB, the height of cloud base cloudCoverTotal: query: "*/CLOU2SEQ/TOCC" + type: float transforms: - scale: 0.01 cloudAmountDescription: query: "*/CLOUDSEQ/CLAM" cloudCeiling: query: "*/CLOU3SEQ/CEILING" + type: float heightAboveSurfaceOfBaseOfLowestCloud: query: "*/CLOU2SEQ/HBLCS" heightOfBaseOfCloud: query: "*/CLOUDSEQ/HOCB" + type: float verticalSignificanceSurfaceObservations: query: "*/CLOUDSEQ/VSSO" verticalVisibility: query: "*/VISB1SEQ/VTVI_SEQ/VTVI" + type: float horizontalVisibility: query: "*/VISB1SEQ/HOVI" + type: float minimumTemperature: query: "*/TMXMNSEQ/MITM" maximumTemperature: @@ -298,7 +307,6 @@ observations: coordinates: "longitude latitude" source: variables/heightAboveSurfaceOfBaseOfLowestCloud longName: "Height above Surface of Base of Lowest Cloud Seen" - units: "m" - name: "ObsValue/heightOfBaseOfCloud" coordinates: "longitude latitude" diff --git a/test/testinput/bufr_ncep_sevcsr.yaml b/test/testinput/bufr_ncep_sevcsr.yaml index bdb2d0d36..93cfaf048 100644 --- a/test/testinput/bufr_ncep_sevcsr.yaml +++ b/test/testinput/bufr_ncep_sevcsr.yaml @@ -30,6 +30,7 @@ observations: query: "*/SAZA" cloudAmount: query: "*/RPSEQ7/CLDMNT" + type: float transforms: - scale: 0.01 brightnessTemperature: diff --git a/test/testinput/gdas.t12z.adpsfc.prepbufr b/test/testinput/gdas.t12z.adpsfc.prepbufr index 1babc02c2..83cc56d27 100644 Binary files a/test/testinput/gdas.t12z.adpsfc.prepbufr and b/test/testinput/gdas.t12z.adpsfc.prepbufr differ diff --git a/test/testoutput/NC005031.nc b/test/testoutput/NC005031.nc index 068a3607a..4c40934c2 100644 --- a/test/testoutput/NC005031.nc +++ b/test/testoutput/NC005031.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d65920f8a882d9d675037bb7787bde8d9d064b72a92b726017575722d60389a -size 216919 +oid sha256:b37f6c880046f75137cc0398aba6b7e78a0a4a91e6389c20722d9e743ecc340d +size 215757 diff --git a/test/testoutput/NC005066.nc b/test/testoutput/NC005066.nc index 2d9b837ff..5aee04f05 100644 --- a/test/testoutput/NC005066.nc +++ b/test/testoutput/NC005066.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e94be48f4b60a1ed6f752ce5d9dc5431613a0ffd49e437e997f18dd84e6e9d19 -size 129692 +oid sha256:997931919211bf20a19b0711d3e24e6d46ed41e40b724d1034487358d8e8e829 +size 129007 diff --git a/test/testoutput/adpupa_prepbufr.nc b/test/testoutput/adpupa_prepbufr.nc index 8eb9293f5..57b5140f0 100644 --- a/test/testoutput/adpupa_prepbufr.nc +++ b/test/testoutput/adpupa_prepbufr.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d52794d95f5330d1073536df16f145c05adb12f130656c4fd1dc58ccc56e412f -size 376184 +oid sha256:446fed0b0bdf5390f97015ac0c2327f75ac8b459aff830d09c4febd60673d0cd +size 378425 diff --git a/test/testoutput/bufr_empty_fields.nc b/test/testoutput/bufr_empty_fields.nc index a94078f8b..5d657f386 100644 --- a/test/testoutput/bufr_empty_fields.nc +++ b/test/testoutput/bufr_empty_fields.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8bd03e06a1270f24216de2c9bc0005741ec3147f4f90a3d7c076655d68f4f5fc -size 24647 +oid sha256:9dae902e3517280891dbb4fbdf39f7d47ec72c066daedaac438f1520fa9de17f +size 24615 diff --git a/test/testoutput/bufr_read_2_dim_blocks.nc b/test/testoutput/bufr_read_2_dim_blocks.nc index 18b92eb08..a8d59bf31 100644 --- a/test/testoutput/bufr_read_2_dim_blocks.nc +++ b/test/testoutput/bufr_read_2_dim_blocks.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b8d11132d96977e0326b88a7068983ad1ebe10dcd58a344fcd95a4d9473450c -size 228754 +oid sha256:d04f07940271c4f098c9b8dea1da0e0c8dc71480bd86f83d91e4949d80ad0cbb +size 227558 diff --git a/test/testoutput/bufr_read_wmo_radiosonde.nc b/test/testoutput/bufr_read_wmo_radiosonde.nc index a95e2d886..fa1b2f41f 100644 --- a/test/testoutput/bufr_read_wmo_radiosonde.nc +++ b/test/testoutput/bufr_read_wmo_radiosonde.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a007fa98ffddb93e854a096fa3e5c90d7ed271471ceccd6a4a5a51aea3ec168f -size 131081 +oid sha256:b362fa9a8366a099cf76b15721a0018c23843c17256728b9868b2a50764ee3c1 +size 130942 diff --git a/test/testoutput/bufr_sfcshp.nc b/test/testoutput/bufr_sfcshp.nc index b29efa4e1..a22589020 100644 --- a/test/testoutput/bufr_sfcshp.nc +++ b/test/testoutput/bufr_sfcshp.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77a32006c6ef07a05448b13143d670c019a756025d4f270a65a96f112f7161aa -size 484029 +oid sha256:0b22b5a6a471fe7f40a45f70f82368798e57482dee2a9f72a0dfa7ed0453ae38 +size 484030 diff --git a/test/testoutput/bufr_simple_groupby.nc b/test/testoutput/bufr_simple_groupby.nc index 352b6d830..cb27ee167 100644 --- a/test/testoutput/bufr_simple_groupby.nc +++ b/test/testoutput/bufr_simple_groupby.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a809b124817045f376d50b72f4a6dad0bd90803961c32570ab45dfd51fdf4dc9 +oid sha256:9450b5569481b60f7408568e41174b33926e7777ad43ebb338eea5025bb5bcb8 size 48992 diff --git a/test/testoutput/gdas.t00z.1bhrs4.tm00.nc b/test/testoutput/gdas.t00z.1bhrs4.tm00.nc index 180169a7f..f793b4632 100644 --- a/test/testoutput/gdas.t00z.1bhrs4.tm00.nc +++ b/test/testoutput/gdas.t00z.1bhrs4.tm00.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc22cd42da1b70cdaef255aaab10eb298003460055da5d357627082287ad4453 -size 653435 +oid sha256:f2e3363afc3d34660d95864f81d529e3f840fe035ea1413e4cac1f0a5736c6fb +size 653448 diff --git a/test/testoutput/gdas.t00z.sevcsr.tm00.nc b/test/testoutput/gdas.t00z.sevcsr.tm00.nc index 92bbbb0c6..4cb60869a 100644 --- a/test/testoutput/gdas.t00z.sevcsr.tm00.nc +++ b/test/testoutput/gdas.t00z.sevcsr.tm00.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b043909d923722fe0ad6cf560fdf8e812f53a85a3dc8c4c920c2e984dd073469 -size 56561 +oid sha256:8f323a107a1d9758c0222249aafe80f00eef792ced712786074b4381766cab75 +size 56554 diff --git a/test/testoutput/gdas.t06z.adpsfc_snow.tm00.nc b/test/testoutput/gdas.t06z.adpsfc_snow.tm00.nc index 0928073e4..2f92a645d 100644 --- a/test/testoutput/gdas.t06z.adpsfc_snow.tm00.nc +++ b/test/testoutput/gdas.t06z.adpsfc_snow.tm00.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d28d8d6c42b7649008f6d0ee6abd0c1a4c0aff65a410ccd3cd8fa8f00d9a7f5f -size 3989692 +oid sha256:7d28e3a6cefe1d5a9cc07e84d6c9db61f27ade0648600083d793b41c777a5264 +size 3986529 diff --git a/test/testoutput/gdas.t12z.1bamua.metop-c.tm00.nc b/test/testoutput/gdas.t12z.1bamua.metop-c.tm00.nc index 662fee60e..b58fed637 100644 --- a/test/testoutput/gdas.t12z.1bamua.metop-c.tm00.nc +++ b/test/testoutput/gdas.t12z.1bamua.metop-c.tm00.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:401bc36e8f3a04196cfa25a9e149d948cbcabe282ccd88c15b71dc1c01f17ff6 -size 101425 +oid sha256:e0d644d49291e31ff7b63f90e4e35edcea50cf3f8bcb384f3ebaaf552af44d3c +size 101448 diff --git a/test/testoutput/gdas.t12z.1bamua.noaa-15.tm00.nc b/test/testoutput/gdas.t12z.1bamua.noaa-15.tm00.nc index fddb781bf..92b65a378 100644 --- a/test/testoutput/gdas.t12z.1bamua.noaa-15.tm00.nc +++ b/test/testoutput/gdas.t12z.1bamua.noaa-15.tm00.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4670037db5f9924d4118051097207c14cbddb9f06a7587040febea135a85167 -size 88711 +oid sha256:301b039f7c7c3289f4d18e61c53d7b93918f0034eac23289b3c5dd0dd15767a5 +size 88731 diff --git a/test/testoutput/gdas.t12z.1bmhs.metop-b.tm00.nc b/test/testoutput/gdas.t12z.1bmhs.metop-b.tm00.nc index 19bc141f3..8bd59bbe6 100644 --- a/test/testoutput/gdas.t12z.1bmhs.metop-b.tm00.nc +++ b/test/testoutput/gdas.t12z.1bmhs.metop-b.tm00.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e4c3cb5c760dab260ac505c6bfa85a7679508e70b02a101941c21280f08d429 -size 158071 +oid sha256:a179d4371fe9af7779d20ec66ba35202d2e2e4d175c7a2402f9d1c82d01b677f +size 158092 diff --git a/test/testoutput/gdas.t12z.adpsfc.prepbufr.nc b/test/testoutput/gdas.t12z.adpsfc.prepbufr.nc index d57e9f3ed..b2769670e 100644 --- a/test/testoutput/gdas.t12z.adpsfc.prepbufr.nc +++ b/test/testoutput/gdas.t12z.adpsfc.prepbufr.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8fff95d791b5c039dd00735b215be31f7ea0f1589620c0cb01a02c206c03b550 -size 285162 +oid sha256:af465f2d17058f406a51242dcb3c6ba506532d6643061a7c1e54654e5b66cdf7 +size 284915 diff --git a/test/testoutput/gdas.t12z.adpsfc.tm00.nc b/test/testoutput/gdas.t12z.adpsfc.tm00.nc index e1438ebbf..9db35373a 100644 --- a/test/testoutput/gdas.t12z.adpsfc.tm00.nc +++ b/test/testoutput/gdas.t12z.adpsfc.tm00.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d58b8c1df7ee2f6ecf808865c0ef892388949e5fae44a0f7a120fad86954400 -size 325767 +oid sha256:f15b99c2ba2c41fea253e56aa4a289696966453a265c634dd0a8cd40d34a54eb +size 328237 diff --git a/test/testoutput/gdas.t12z.esamua.noaa-18.tm00.nc b/test/testoutput/gdas.t12z.esamua.noaa-18.tm00.nc index 7964d99dc..9219b4b53 100644 --- a/test/testoutput/gdas.t12z.esamua.noaa-18.tm00.nc +++ b/test/testoutput/gdas.t12z.esamua.noaa-18.tm00.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e18178330b76bce05c18df36891fc08c0b62dcb751de0b06366ffc4dc28bafb -size 72606 +oid sha256:22ea0481d12249440b7041c4d95d3786f055d2ceb4fcd0ecfc73d98217877c20 +size 72442 diff --git a/test/testoutput/gdas.t12z.esmhs.noaa-19.tm00.nc b/test/testoutput/gdas.t12z.esmhs.noaa-19.tm00.nc index df0ac64b0..1ac9f1efc 100644 --- a/test/testoutput/gdas.t12z.esmhs.noaa-19.tm00.nc +++ b/test/testoutput/gdas.t12z.esmhs.noaa-19.tm00.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a11b540f6da831848535a548cb49e38772e0154816741cdcaebcf13a18400a84 -size 158226 +oid sha256:1a806f9d58bca1684a96a846b76aa18b1d44d01941591412aec28a3d6d94fd3b +size 158064 diff --git a/test/testoutput/gdas.t12z.mtiasi.metop-c.tm00.nc b/test/testoutput/gdas.t12z.mtiasi.metop-c.tm00.nc index b22fbe948..d9f111e3b 100644 --- a/test/testoutput/gdas.t12z.mtiasi.metop-c.tm00.nc +++ b/test/testoutput/gdas.t12z.mtiasi.metop-c.tm00.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5be136c7ecf703584c48f817612fea4dc74314cbf976690d9a6a039ef69faa0 -size 552876 +oid sha256:9ab3e5c11041d73a2d56748f6e72b57df3cf4a2a67ff80287dac40a012846735 +size 545926 diff --git a/test/testoutput/gdas.t18z.1bmhs.tm00.15.7.filter_split.nc b/test/testoutput/gdas.t18z.1bmhs.tm00.15.7.filter_split.nc index a89e9e6c7..0d3db7f22 100644 --- a/test/testoutput/gdas.t18z.1bmhs.tm00.15.7.filter_split.nc +++ b/test/testoutput/gdas.t18z.1bmhs.tm00.15.7.filter_split.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e3334e007b90a28a10256b4ba7cc54e1a378fcd11a1ed229d786aa19658dbfc -size 57914 +oid sha256:fb2278b9c7b894e243efd7c3705c59ec0b29e20dfafe6c516c33ed6438b83ab5 +size 57916 diff --git a/test/testoutput/gdas.t18z.1bmhs.tm00.15.seven.split.nc b/test/testoutput/gdas.t18z.1bmhs.tm00.15.seven.split.nc index 58ebe711c..4b6787fb1 100644 --- a/test/testoutput/gdas.t18z.1bmhs.tm00.15.seven.split.nc +++ b/test/testoutput/gdas.t18z.1bmhs.tm00.15.seven.split.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f364b21dd122812ed241dcede95334de54c1a8fff1e4138d54f151130725f091 +oid sha256:cb80523052bef9bcd45f1c33823e013de8ad20c49f6db9d7e3606fef262ef0c1 size 69271 diff --git a/test/testoutput/gdas.t18z.1bmhs.tm00.filtering.nc b/test/testoutput/gdas.t18z.1bmhs.tm00.filtering.nc index 95f97609d..23cbb369e 100644 --- a/test/testoutput/gdas.t18z.1bmhs.tm00.filtering.nc +++ b/test/testoutput/gdas.t18z.1bmhs.tm00.filtering.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1eaf775b5e422bd54eea30bf5feb71f7e2300a4e96644845de5aa24981274598 -size 103767 +oid sha256:dd17602a8ce81bf471912b8b60cf8e59c2afccd78369de6302c39154f6c6b76d +size 103769 diff --git a/test/testoutput/gdas.t18z.1bmhs.tm00.nc b/test/testoutput/gdas.t18z.1bmhs.tm00.nc index fff1e0211..db596cd3a 100644 --- a/test/testoutput/gdas.t18z.1bmhs.tm00.nc +++ b/test/testoutput/gdas.t18z.1bmhs.tm00.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:295a7ec26f8239efb1ac8b1d69c2ffe6747f949cbf0ed1ddb9b4a9608954962c -size 799162 +oid sha256:8ca1088d8d7f6185c3313486cf2924a1f9f0aa25a8a733fb23ece7112eca0b60 +size 798784 diff --git a/test/testoutput/gdas.t18z.satwnd_avhrr.tm00.nc b/test/testoutput/gdas.t18z.satwnd_avhrr.tm00.nc index 1a024a65b..2ba5ef90c 100644 --- a/test/testoutput/gdas.t18z.satwnd_avhrr.tm00.nc +++ b/test/testoutput/gdas.t18z.satwnd_avhrr.tm00.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebbba1795a7e0dcc7955c6e05033395424fed771650d7fd0757b6377c5809fcc -size 303710 +oid sha256:5bcdae0bb6086ffc572a9fcca8c1d709d40e4780cc07b55e6bf85547dc87c97d +size 300447 diff --git a/tools/bufr/print_queries.cpp b/tools/bufr/print_queries.cpp index 3765cb787..0dd25d675 100644 --- a/tools/bufr/print_queries.cpp +++ b/tools/bufr/print_queries.cpp @@ -68,12 +68,11 @@ std::vector getQueries(int fileUnit, { static const int SubsetLen = 9; - size_t msgNum = 0; - int iddate; int bufrLoc; int il, im; // throw away char current_subset[9]; + bool subsetFound = false; std::vector queryData; @@ -86,12 +85,18 @@ std::vector getQueries(int fileUnit, status_f(fileUnit, &bufrLoc, &il, &im); dataProvider.updateData(bufrLoc); - msgNum++; if (msg_subset == subset) { - queryData = Ingester::bufr::SubsetTable(dataProvider).allQueryData(); - break; + while (ireadsb_f(fileUnit) == 0) + { + status_f(fileUnit, &bufrLoc, &il, &im); + dataProvider.updateData(bufrLoc); + queryData = Ingester::bufr::SubsetTable(dataProvider).allQueryData(); + subsetFound = true; + } } + + if (subsetFound) break; } return queryData; @@ -120,6 +125,54 @@ std::string dimStyledStr(int dims) return ostr.str(); } +std::string typeStyledStr(const Ingester::bufr::TypeInfo& info) +{ + std::string typeStr; + + if (info.isString()) + { + typeStr = "string"; + } + else if (info.isInteger()) + { + if (info.isSigned()) + { + if (info.is64Bit()) + { + typeStr = "int64 "; + } + else + { + typeStr = "int "; + } + } + else + { + if (info.is64Bit()) + { + typeStr = "uint64"; + } + else + { + typeStr = "uint "; + } + } + } + else + { + if (info.is64Bit()) + { + typeStr = "double"; + } + else + { + typeStr = "float "; + } + } + + return typeStr; +} + void printDimPaths(std::vector> dimPaths) { for (auto& dimPath : dimPaths) @@ -134,6 +187,7 @@ void printQueryList(const std::vector& queries) { std::ostringstream ostr; ostr << dimStyledStr(query.dimIdxs.size()) << " "; + ostr << typeStyledStr(query.typeInfo) << " "; ostr << query.pathComponents[0]; for (size_t pathIdx = 1; pathIdx < query.pathComponents.size(); pathIdx++) { @@ -176,7 +230,7 @@ void printQueries(const std::string& filePath, mtinfo_f(tablePath.c_str(), FileUnitTable1, FileUnitTable2); } - auto dataProvider = Ingester::bufr::DataProvider(); + auto dataProvider = Ingester::bufr::DataProvider(FileUnit); if (!subset.empty()) { auto queries = getQueries(FileUnit, subset.c_str(), dataProvider);