Skip to content

Commit 90cb6e7

Browse files
committed
Added a check in the HDF5 data reader to check that the metadata for
each field actually matches the dimensions of the data fields. Added a helper function for conduit to allow the calculation of a product of a data array's elements.
1 parent d7c5780 commit 90cb6e7

File tree

3 files changed

+64
-1
lines changed

3 files changed

+64
-1
lines changed

Diff for: include/lbann/utils/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ set_full_path(THIS_DIR_HEADERS
3232
cloneable.hpp
3333
commify.hpp
3434
compiler_control.hpp
35+
conduit_extensions.hpp
3536
dataset.hpp
3637
describable.hpp
3738
description.hpp

Diff for: include/lbann/utils/conduit_extensions.hpp

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
////////////////////////////////////////////////////////////////////////////////
2+
// Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3+
// Produced at the Lawrence Livermore National Laboratory.
4+
// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5+
// the CONTRIBUTORS file. <[email protected]>
6+
//
7+
// LLNL-CODE-697807.
8+
// All rights reserved.
9+
//
10+
// This file is part of LBANN: Livermore Big Artificial Neural Network
11+
// Toolkit. For details, see http://software.llnl.gov/LBANN or
12+
// https://github.com/LBANN.
13+
//
14+
// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15+
// may not use this file except in compliance with the License. You may
16+
// obtain a copy of the License at:
17+
//
18+
// http://www.apache.org/licenses/LICENSE-2.0
19+
//
20+
// Unless required by applicable law or agreed to in writing, software
21+
// distributed under the License is distributed on an "AS IS" BASIS,
22+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23+
// implied. See the License for the specific language governing
24+
// permissions and limitations under the license.
25+
////////////////////////////////////////////////////////////////////////////////
26+
27+
#include "conduit/conduit_data_array.hpp"
28+
29+
namespace conduit {
30+
31+
template <typename T>
32+
T
33+
data_array_prod(DataArray<T> a)
34+
{
35+
T res = 1;
36+
for(index_t i = 0; i < a.number_of_elements(); i++)
37+
{
38+
const T &val = a.element(i);
39+
res *= val;
40+
}
41+
42+
return res;
43+
}
44+
45+
} // conduit

Diff for: src/data_ingestion/readers/data_reader_HDF5.cpp

+18-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
//
2626
/////////////////////////////////////////////////////////////////////////////////
2727
#include "conduit/conduit_relay_mpi.hpp"
28+
#include "lbann/utils/conduit_extensions.hpp"
2829

2930
#include "lbann/data_ingestion/readers/data_reader_HDF5.hpp"
3031
#include "lbann/data_ingestion/readers/data_reader_sample_list_impl.hpp"
@@ -357,7 +358,23 @@ void hdf5_data_reader::load_sample(conduit::Node& node,
357358
original_path,
358359
node[new_pathname]);
359360
}
360-
361+
// Check that the dimensions of each node matches its metadata
362+
if (metadata.has_child(HDF5_METADATA_KEY_DIMS)) {
363+
int n_elts = node[pathname].dtype().number_of_elements();
364+
conduit::int64_array data_array_dims = metadata[HDF5_METADATA_KEY_DIMS].value();
365+
auto expected_n_elts = data_array_prod(data_array_dims);
366+
367+
if (n_elts != expected_n_elts) {
368+
LBANN_WARNING("Ingesting sample field ",
369+
pathname,
370+
" for sample ",
371+
sample_name,
372+
" where the dimensions in the metadata don't match the actual field: ",
373+
expected_n_elts,
374+
" != ",
375+
n_elts);
376+
}
377+
}
361378
// check to see if there are integer types left in the sample and warn the
362379
// user
363380
auto dtype = node[new_pathname].dtype();

0 commit comments

Comments
 (0)