From d8a147fb7486474a6e9142d7fef7a4fbdb37e9e1 Mon Sep 17 00:00:00 2001 From: "Brian C. Van Essen" Date: Tue, 12 Dec 2023 14:26:13 -0800 Subject: [PATCH 1/3] Cleaning up the way in which random number generators are used between sequential and OMP parallel blocks. Added explict RNGs for OMP parallel blocks. Fixed the declaration and initialization of OMP RNGs when compiling in deterministic mode (which disables OMP parallel blocks). Refactored how the RNGs are saved to and loaded from checkpoints. Changed the creation of the I/O RNG banks so that there is one bank per I/O thread. Removed the option to explicitly set the number of I/O RNGs. --- include/lbann/utils/options.hpp | 1 - .../lbann/utils/random_number_generators.hpp | 15 +- src/callbacks/mixup.cpp | 9 +- src/utils/lbann_library.cpp | 11 +- src/utils/options.cpp | 7 - src/utils/random.cpp | 185 ++++++++---------- src/utils/random_number_generators.cpp | 75 +++++-- 7 files changed, 163 insertions(+), 140 deletions(-) diff --git a/include/lbann/utils/options.hpp b/include/lbann/utils/options.hpp index 96d5f18be70..f0435f8cc51 100644 --- a/include/lbann/utils/options.hpp +++ b/include/lbann/utils/options.hpp @@ -79,7 +79,6 @@ namespace lbann { #define LBANN_OPTION_MODEL "model" #define LBANN_OPTION_NUM_EPOCHS "num_epochs" #define LBANN_OPTION_NUM_IO_THREADS "Num. IO threads" -#define LBANN_OPTION_MAX_IO_RNG_BANKS "Max IO RNG banks" #define LBANN_OPTION_OPTIMIZER "optimizer" #define LBANN_OPTION_PROCS_PER_TRAINER "Processes per trainer" #define LBANN_OPTION_PROTOTEXT "prototext" diff --git a/include/lbann/utils/random_number_generators.hpp b/include/lbann/utils/random_number_generators.hpp index 17ddbe6c2d8..aa6d4a1ff51 100644 --- a/include/lbann/utils/random_number_generators.hpp +++ b/include/lbann/utils/random_number_generators.hpp @@ -85,17 +85,28 @@ struct locked_io_rng_ref /** * Return a reference to the global LBANN random number generator. - * @note If compiling with OpenMP, this is stored in a threadprivate variable. */ rng_gen& get_generator(); /** * Return a reference to a possibly-faster global LBANN random number generator. * Compared to get_generator, this should be slightly faster. - * @note If compiling with OpenMP, this is stored in a threadprivate variable. */ fast_rng_gen& get_fast_generator(); +/** + * Return a reference to the OMP thread local LBANN random number generator. + * @note This is stored in a threadprivate variable. + */ +rng_gen& get_OMP_generator(); + +/** + * Return a reference to a possibly-faster OMP thread local LBANN random number + * generator. Compared to get_generator, this should be slightly faster. + * @note This is stored in a threadprivate variable. + */ +fast_rng_gen& get_OMP_fast_generator(); + /** * Return a reference to a global LBANN random number generator for LTFB. * @note If compiling with OpenMP, this is stored in a threadprivate variable. diff --git a/src/callbacks/mixup.cpp b/src/callbacks/mixup.cpp index 28dd9d8233c..e12fad70c59 100644 --- a/src/callbacks/mixup.cpp +++ b/src/callbacks/mixup.cpp @@ -31,9 +31,9 @@ #include "lbann/proto/proto_common.hpp" #include "lbann/utils/beta.hpp" #include "lbann/utils/exception.hpp" +#include "lbann/utils/profiling.hpp" #include "lbann/utils/protobuf.hpp" #include "lbann/utils/serialize.hpp" -#include "lbann/utils/profiling.hpp" #include #include "lbann/proto/callbacks.pb.h" @@ -96,7 +96,6 @@ void mixup::on_forward_prop_end(model* m, Layer* l) El::Int mbsize = samples.Width(); const El::Int samples_height = samples.Height(); const El::Int labels_height = labels.Height(); - auto& gen = get_fast_generator(); beta_distribution dist(m_alpha, m_alpha); // For now, data must be on the CPU. @@ -108,7 +107,9 @@ void mixup::on_forward_prop_end(model* m, Layer* l) // Decide how to mix the mini-batch. std::vector shuffled_indices(mbsize); std::iota(shuffled_indices.begin(), shuffled_indices.end(), 0); - std::shuffle(shuffled_indices.begin(), shuffled_indices.end(), gen); + std::shuffle(shuffled_indices.begin(), + shuffled_indices.end(), + get_fast_generator()); LBANN_OMP_PARALLEL_FOR for (El::Int i = 0; i < mbsize; ++i) { @@ -116,7 +117,7 @@ void mixup::on_forward_prop_end(model* m, Layer* l) if (i == j) { continue; } - float lambda = dist(gen); + float lambda = dist(get_OMP_fast_generator()); lambda = std::max(lambda, 1.0f - lambda); const float lambda_sub = 1.0f - lambda; const DataType* __restrict__ x1_buf = samples.LockedBuffer(0, i); diff --git a/src/utils/lbann_library.cpp b/src/utils/lbann_library.cpp index b5ad85e8a08..e33c27674ad 100644 --- a/src/utils/lbann_library.cpp +++ b/src/utils/lbann_library.cpp @@ -257,9 +257,8 @@ trainer& construct_trainer(lbann_comm* comm, #endif // Initialize the general RNGs and the data sequence RNGs - int max_io_rng_banks = arg_parser.get(LBANN_OPTION_MAX_IO_RNG_BANKS); // Create a set of RNG banks for both training and validation type phases - init_random(random_seed, max_io_rng_banks * 2); + init_random(random_seed, io_threads_per_process * 2); init_data_seq_random(data_seq_random_seed); init_ltfb_random(root_random_seed); global_trainer_->set_random_seeds(root_random_seed, @@ -374,21 +373,17 @@ std::unique_ptr construct_io_thread_pool(lbann_comm* comm, auto& arg_parser = global_argument_parser(); int req_io_threads = arg_parser.get(LBANN_OPTION_NUM_IO_THREADS); - int max_io_rng_banks = arg_parser.get(LBANN_OPTION_MAX_IO_RNG_BANKS); // Limit the number of I/O threads to: // < number of available free cores per process - // < number of RNG banks provisioned // and at least one - int num_io_threads = std::max( - std::min(max_io_rng_banks, std::min(max_io_threads, req_io_threads)), - 1); + int num_io_threads = std::max(std::min(max_io_threads, req_io_threads), 1); auto io_threads_offset = free_core_offset(comm); if (comm->am_world_master()) { std::cout << "\tNum. I/O Threads: " << num_io_threads << " (Limited to # Unused Compute Cores [" << max_io_threads - << "] # of RNG banks [" << max_io_rng_banks + << "] # of RNG banks [" << (num_io_threads * 2) << "] or 1) at offset " << io_threads_offset << std::endl; } diff --git a/src/utils/options.cpp b/src/utils/options.cpp index b83509f22af..6335f10cc4a 100644 --- a/src/utils/options.cpp +++ b/src/utils/options.cpp @@ -187,13 +187,6 @@ void construct_std_options() "[STD] Number of threads available to both I/O and " "initial data transformations for each rank. (Default: 4)", 4); - arg_parser.add_option( - LBANN_OPTION_MAX_IO_RNG_BANKS, - {"--max_io_thread_rngs"}, - utils::ENV("LBANN_MAX_IO_RNG_BANKS"), - "[STD] Maximum number of random number generator banks available to " - "both I/O and initial data transformations for each rank. (Default: 128)", - 128); arg_parser.add_option( LBANN_OPTION_OMP_NUM_THREADS, {"--omp_num_threads"}, diff --git a/src/utils/random.cpp b/src/utils/random.cpp index f2494dd9d7e..80542499fa3 100644 --- a/src/utils/random.cpp +++ b/src/utils/random.cpp @@ -38,6 +38,45 @@ namespace lbann { +namespace { +void save_rng_state(std::string rng_name, rng_gen& gen) +{ + std::ofstream rng(rng_name); + if (!rng) { + LBANN_ERROR("Failed to open ", rng_name); + } + rng << gen; + rng.close(); +} +void save_rng_state(std::string rng_name, fast_rng_gen& gen) +{ + std::ofstream rng(rng_name); + if (!rng) { + LBANN_ERROR("Failed to open ", rng_name); + } + rng << gen; + rng.close(); +} + +void load_rng_state(std::string rng_name, rng_gen& gen) +{ + std::ifstream rng_seq(rng_name); + if (!rng_seq) { + LBANN_ERROR("Failed to open ", rng_name); + } + rng_seq >> gen; +} + +void load_rng_state(std::string rng_name, fast_rng_gen& gen) +{ + std::ifstream rng_seq(rng_name); + if (!rng_seq) { + LBANN_ERROR("Failed to open ", rng_name); + } + rng_seq >> gen; +} +} // namespace + bool save_rng_to_checkpoint(persist& p, lbann_comm* comm, bool is_distributed) { std::string dirname = std::string(p.m_checkpoint_dir) + "/rng_state"; @@ -59,12 +98,7 @@ bool save_rng_to_checkpoint(persist& p, lbann_comm* comm, bool is_distributed) if (comm == nullptr || comm->am_trainer_master() || is_distributed) { /// @todo - Note that the RNG with thread local data is not correct rng_name = dirname + "/rng_seq_generator"; - std::ofstream rng_seq(rng_name); - if (!rng_seq) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_seq << get_data_seq_generator(); - rng_seq.close(); + save_rng_state(rng_name, get_data_seq_generator()); rng_name = dirname + "/EL_generator"; std::ofstream rng_EL(rng_name); @@ -90,6 +124,8 @@ bool save_rng_to_checkpoint(persist& p, lbann_comm* comm, bool is_distributed) } locked_io_rng_ref io_rng = set_io_generators_local_index(i); + // save_rng_state(rng_name); + // save_rng_state(rng_name); rng_io << get_io_generator(); rng_fast_io << get_fast_io_generator(); @@ -97,60 +133,33 @@ bool save_rng_to_checkpoint(persist& p, lbann_comm* comm, bool is_distributed) rng_fast_io.close(); } -#ifdef _OPENMP + rng_name = dirname + "/rng_generator_" + rank_in_trainer; + save_rng_state(rng_name, get_generator()); + + rng_name = dirname + "/rng_fast_generator_" + rank_in_trainer; + save_rng_state(rng_name, get_fast_generator()); + + rng_name = dirname + "/rng_ltfb_generator_" + rank_in_trainer; + save_rng_state(rng_name, get_ltfb_generator()); + +#if not defined(LBANN_DETERMINISTIC) && defined(_OPENMP) + // #ifdef _OPENMP #pragma omp parallel private(rng_name) { - rng_name = dirname + "/rng_generator_" + rank_in_trainer + "_" + + rng_name = dirname + "/rng_OMP_generator_" + rank_in_trainer + "_" + std::to_string(omp_get_thread_num()); - std::ofstream rng(rng_name); - if (!rng) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng << get_generator(); - rng.close(); + save_rng_state(rng_name, get_OMP_generator()); - rng_name = dirname + "/rng_fast_generator_" + rank_in_trainer + "_" + + rng_name = dirname + "/rng_OMP_fast_generator_" + rank_in_trainer + "_" + std::to_string(omp_get_thread_num()); - std::ofstream rng_fast(rng_name); - if (!rng_fast) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_fast << get_fast_generator(); - rng_fast.close(); - - rng_name = dirname + "/rng_ltfb_generator_" + rank_in_trainer + "_" + - std::to_string(omp_get_thread_num()); - std::ofstream rng_ltfb(rng_name); - if (!rng_ltfb) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_ltfb << get_ltfb_generator(); - rng_ltfb.close(); + save_rng_state(rng_name, get_OMP_fast_generator()); } -#else - rng_name = dirname + "/rng_generator_" + rank_in_trainer; - std::ofstream rng(rng_name); - if (!rng) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng << get_generator(); - rng.close(); - - rng_name = dirname + "/rng_fast_generator_" + rank_in_trainer; - std::ofstream rng_fast(rng_name); - if (!rng_fast) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_fast << get_fast_generator(); - rng_fast.close(); +#else // not defined(LBANN_DETERMINISTIC) && defined(_OPENMP) + rng_name = dirname + "/rng_OMP_generator_" + rank_in_trainer; + save_rng_state(rng_name, get_OMP_generator()); - rng_name = dirname + "/rng_ltfb_generator_" + rank_in_trainer; - std::ofstream rng_ltfb(rng_name); - if (!rng_ltfb) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_ltfb << get_ltfb_generator(); - rng_ltfb.close(); + rng_name = dirname + "/rng_OMP_fast_generator_" + rank_in_trainer; + save_rng_state(rng_name, get_OMP_fast_generator()); #endif return true; @@ -174,11 +183,7 @@ bool load_rng_from_checkpoint(persist& p, const lbann_comm* comm) /// @todo - Note that the RNG with thread local data is not correct rng_name = dirname + "/rng_seq_generator"; - std::ifstream rng_seq(rng_name); - if (!rng_seq) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_seq >> get_data_seq_generator(); + load_rng_state(rng_name, get_data_seq_generator()); rng_name = dirname + "/EL_generator"; std::ifstream rng_EL(rng_name); @@ -214,54 +219,32 @@ bool load_rng_from_checkpoint(persist& p, const lbann_comm* comm) rng_fast_io >> get_fast_io_generator(); } -#ifdef _OPENMP + rng_name = dirname + "/rng_generator_" + rank_in_trainer; + load_rng_state(rng_name, get_generator()); + + rng_name = dirname + "/rng_fast_generator_" + rank_in_trainer; + load_rng_state(rng_name, get_fast_generator()); + + rng_name = dirname + "/rng_ltfb_generator_" + rank_in_trainer; + load_rng_state(rng_name, get_ltfb_generator()); + +#if not defined(LBANN_DETERMINISTIC) && defined(_OPENMP) #pragma omp parallel private(rng_name) { - rng_name = dirname + "/rng_generator_" + rank_in_trainer + "_" + + rng_name = dirname + "/rng_OMP_generator_" + rank_in_trainer + "_" + std::to_string(omp_get_thread_num()); - std::ifstream rng(rng_name); - if (!rng) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng >> get_generator(); + load_rng_state(rng_name, get_OMP_generator()); - rng_name = dirname + "/rng_fast_generator_" + rank_in_trainer + "_" + + rng_name = dirname + "/rng_OMP_fast_generator_" + rank_in_trainer + "_" + std::to_string(omp_get_thread_num()); - std::ifstream rng_fast(rng_name); - if (!rng_fast) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_fast >> get_fast_generator(); - - rng_name = dirname + "/rng_ltfb_generator_" + rank_in_trainer + "_" + - std::to_string(omp_get_thread_num()); - std::ifstream rng_ltfb(rng_name); - if (!rng_ltfb) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_ltfb >> get_ltfb_generator(); - } -#else - rng_name = dirname + "/rng_generator_" + rank_in_trainer; - std::ifstream rng(rng_name); - if (!rng) { - LBANN_ERROR("Failed to open ", rng_name); + load_rng_state(rng_name, get_OMP_fast_generator()); } - rng >> get_generator(); +#else // not defined(LBANN_DETERMINISTIC) && defined(_OPENMP) + rng_name = dirname + "/rng_OMP_generator_" + rank_in_trainer; + load_rng_state(rng_name, get_OMP_generator()); - rng_name = dirname + "/rng_fast_generator_" + rank_in_trainer; - std::ifstream rng_fast(rng_name); - if (!rng_fast) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_fast >> get_fast_generator(); - - rng_name = dirname + "/rng_ltfb_generator_" + rank_in_trainer; - std::ifstream rng_ltfb(rng_name); - if (!rng_ltfb) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_ltfb >> get_ltfb_generator(); + rng_name = dirname + "/rng_OMP_fast_generator_" + rank_in_trainer; + load_rng_state(rng_name, get_OMP_fast_generator()); #endif return true; } @@ -476,7 +459,7 @@ void gaussian_fill_parallel(El::AbstractDistMatrix& mat, const size_t size = local_vals.Height() * local_vals.Width(); LBANN_OMP_PARALLEL_FOR_ARGS(firstprivate(dist)) for (size_t i = 0; i < size; ++i) { - buffer[i] = dist(get_generator()); + buffer[i] = dist(get_OMP_generator()); } } else { @@ -487,7 +470,7 @@ void gaussian_fill_parallel(El::AbstractDistMatrix& mat, LBANN_OMP_PARALLEL_FOR_ARGS(collapse(2) firstprivate(dist)) for (size_t j = 0; j < width; ++j) { for (size_t i = 0; i < height; ++i) { - buffer[i + j * ldim] = dist(get_generator()); + buffer[i + j * ldim] = dist(get_OMP_generator()); } } } diff --git a/src/utils/random_number_generators.cpp b/src/utils/random_number_generators.cpp index 52860385653..c9bc8c88266 100644 --- a/src/utils/random_number_generators.cpp +++ b/src/utils/random_number_generators.cpp @@ -32,6 +32,7 @@ #include namespace { +#if not defined(LBANN_DETERMINISTIC) && defined(_OPENMP) #ifdef __ICC lbann::rng_gen generator; #pragma omp threadprivate(generator) @@ -39,25 +40,42 @@ lbann::rng_gen generator; lbann::fast_rng_gen fast_generator; #pragma omp threadprivate(fast_generator) -lbann::fast_rng_gen ltfb_generator; -#pragma omp threadprivate(ltfb_generator) +bool OMP_generator_inited = false; +#pragma omp threadprivate(OMP_generator_inited) + +bool OMP_fast_generator_inited = false; +#pragma omp threadprivate(OMP_fast_generator_inited) #else // Random number generator, file-visible only. // Defined like this to work around a GCC problem with threadprivate objects: // https://stackoverflow.com/questions/23552077/how-to-define-a-object-or-struct-as-threadprivate-in-openmp/ -extern lbann::rng_gen generator; -#pragma omp threadprivate(generator) -lbann::rng_gen generator; +extern lbann::rng_gen OMP_generator; +#pragma omp threadprivate(OMP_generator) +lbann::rng_gen OMP_generator; -extern lbann::fast_rng_gen fast_generator; -#pragma omp threadprivate(fast_generator) -lbann::fast_rng_gen fast_generator; +extern lbann::fast_rng_gen OMP_fast_generator; +#pragma omp threadprivate(OMP_fast_generator) +lbann::fast_rng_gen OMP_fast_generator; -extern lbann::fast_rng_gen ltfb_generator; -#pragma omp threadprivate(ltfb_generator) -lbann::fast_rng_gen ltfb_generator; +extern bool OMP_generator_inited; +#pragma omp threadprivate(OMP_generator_inited) +bool OMP_generator_inited = false; + +extern bool OMP_fast_generator_inited; +#pragma omp threadprivate(OMP_fast_generator_inited) +bool OMP_fast_generator_inited = false; +#endif +#else // not defined(LBANN_DETERMINISTIC) && defined(_OPENMP) +lbann::rng_gen OMP_generator; +lbann::fast_rng_gen OMP_fast_generator; +bool OMP_generator_inited = false; +bool OMP_fast_generator_inited = false; #endif +lbann::rng_gen generator; +lbann::fast_rng_gen fast_generator; +lbann::fast_rng_gen ltfb_generator; + bool generator_inited = false; bool fast_generator_inited = false; bool ltfb_generator_inited = false; @@ -99,6 +117,22 @@ fast_rng_gen& get_ltfb_generator() return ::ltfb_generator; } +rng_gen& get_OMP_generator() +{ + if (!::OMP_generator_inited) { + LBANN_ERROR("OMP RNG seed not set"); + } + return ::OMP_generator; +} + +fast_rng_gen& get_OMP_fast_generator() +{ + if (!::OMP_fast_generator_inited) { + LBANN_ERROR("OMP Fast RNG seed not set"); + } + return ::OMP_fast_generator; +} + rng_gen& get_data_seq_generator() { if (!::data_seq_generator_inited) { @@ -165,6 +199,8 @@ void init_random(int seed, int num_io_RNGs, lbann_comm* comm) { generator_inited = true; fast_generator_inited = true; + OMP_generator_inited = true; + OMP_fast_generator_inited = true; // Use different seed on each rank in trainer if (seed == -1) { @@ -178,20 +214,25 @@ void init_random(int seed, int num_io_RNGs, lbann_comm* comm) seed = hash_combine(seed, El::mpi::Rank(El::mpi::COMM_WORLD)); } + get_generator().seed(seed); + get_fast_generator().seed(hash_combine(seed, 41263)); // 4321th prime + // Seed every OpenMP thread, if present. // Note: Threadprivate OMP variables don't work with dynamic threads. -#ifdef _OPENMP +#if not defined(LBANN_DETERMINISTIC) && defined(_OPENMP) #pragma omp parallel { const int thread = omp_get_thread_num(); const int thread_seed = hash_combine(seed, thread); - get_generator().seed(thread_seed); - get_fast_generator().seed( + OMP_generator_inited = true; + OMP_fast_generator_inited = true; + get_OMP_generator().seed(thread_seed); + get_OMP_fast_generator().seed( hash_combine(thread_seed, 132241)); // 12345th prime } -#else - get_generator().seed(seed); - get_fast_generator().seed(hash_combine(seed, 41263)); // 4321th prime +#else // not defined(LBANN_DETERMINISTIC) && defined(_OPENMP) + get_OMP_generator().seed(seed); + get_OMP_fast_generator().seed(hash_combine(seed, 41263)); // 4321th prime #endif // Set Elemental's RNG seed From 3c1c6b8be29538794db5e95d77ac7d6eb0101a48 Mon Sep 17 00:00:00 2001 From: "Brian C. Van Essen" Date: Tue, 12 Dec 2023 14:49:25 -0800 Subject: [PATCH 2/3] Additional cleanup of RNG checkpoint code. --- src/utils/random.cpp | 61 ++++++++++---------------------------------- 1 file changed, 14 insertions(+), 47 deletions(-) diff --git a/src/utils/random.cpp b/src/utils/random.cpp index 80542499fa3..53aeae835a4 100644 --- a/src/utils/random.cpp +++ b/src/utils/random.cpp @@ -101,36 +101,17 @@ bool save_rng_to_checkpoint(persist& p, lbann_comm* comm, bool is_distributed) save_rng_state(rng_name, get_data_seq_generator()); rng_name = dirname + "/EL_generator"; - std::ofstream rng_EL(rng_name); - if (!rng_EL) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_EL << El::Generator(); - rng_EL.close(); + save_rng_state(rng_name, El::Generator()); } for (int i = 0; i < get_num_io_generators(); i++) { - rng_name = dirname + "/rng_io_generator_" + rank_in_trainer + "_t" + - std::to_string(i); - std::ofstream rng_io(rng_name); - if (!rng_io) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_name = dirname + "/rng_fast_io_generator_" + rank_in_trainer + "_t" + - std::to_string(i); - std::ofstream rng_fast_io(rng_name); - if (!rng_fast_io) { - LBANN_ERROR("Failed to open ", rng_name); - } - locked_io_rng_ref io_rng = set_io_generators_local_index(i); - // save_rng_state(rng_name); - // save_rng_state(rng_name); - rng_io << get_io_generator(); - rng_fast_io << get_fast_io_generator(); - - rng_io.close(); - rng_fast_io.close(); + save_rng_state(dirname + "/rng_io_generator_" + rank_in_trainer + "_t" + + std::to_string(i), + get_io_generator()); + save_rng_state(dirname + "/rng_fast_io_generator_" + rank_in_trainer + + "_t" + std::to_string(i), + get_fast_io_generator()); } rng_name = dirname + "/rng_generator_" + rank_in_trainer; @@ -143,7 +124,6 @@ bool save_rng_to_checkpoint(persist& p, lbann_comm* comm, bool is_distributed) save_rng_state(rng_name, get_ltfb_generator()); #if not defined(LBANN_DETERMINISTIC) && defined(_OPENMP) - // #ifdef _OPENMP #pragma omp parallel private(rng_name) { rng_name = dirname + "/rng_OMP_generator_" + rank_in_trainer + "_" + @@ -186,11 +166,7 @@ bool load_rng_from_checkpoint(persist& p, const lbann_comm* comm) load_rng_state(rng_name, get_data_seq_generator()); rng_name = dirname + "/EL_generator"; - std::ifstream rng_EL(rng_name); - if (!rng_EL) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_EL >> El::Generator(); + load_rng_state(rng_name, El::Generator()); std::string rank_in_trainer; if (comm == nullptr) { @@ -201,22 +177,13 @@ bool load_rng_from_checkpoint(persist& p, const lbann_comm* comm) } for (int i = 0; i < get_num_io_generators(); i++) { - rng_name = dirname + "/rng_io_generator_" + rank_in_trainer + "_t" + - std::to_string(i); - std::ifstream rng_io(rng_name); - if (!rng_io) { - LBANN_ERROR("Failed to open ", rng_name); - } - rng_name = dirname + "/rng_fast_io_generator_" + rank_in_trainer + "_t" + - std::to_string(i); - std::ifstream rng_fast_io(rng_name); - if (!rng_fast_io) { - LBANN_ERROR("Failed to open ", rng_name); - } - locked_io_rng_ref io_rng = set_io_generators_local_index(i); - rng_io >> get_io_generator(); - rng_fast_io >> get_fast_io_generator(); + load_rng_state(dirname + "/rng_io_generator_" + rank_in_trainer + "_t" + + std::to_string(i), + get_io_generator()); + load_rng_state(dirname + "/rng_fast_io_generator_" + rank_in_trainer + + "_t" + std::to_string(i), + get_fast_io_generator()); } rng_name = dirname + "/rng_generator_" + rank_in_trainer; From 2382348128e4b7c8bfa21105ca57beb09b9cef99 Mon Sep 17 00:00:00 2001 From: "Brian C. Van Essen" Date: Wed, 21 Feb 2024 16:36:42 -0800 Subject: [PATCH 3/3] Fixed the definition of the generators for ICC compilers. --- src/utils/random_number_generators.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/utils/random_number_generators.cpp b/src/utils/random_number_generators.cpp index c9bc8c88266..8624ba9dde8 100644 --- a/src/utils/random_number_generators.cpp +++ b/src/utils/random_number_generators.cpp @@ -34,18 +34,18 @@ namespace { #if not defined(LBANN_DETERMINISTIC) && defined(_OPENMP) #ifdef __ICC -lbann::rng_gen generator; -#pragma omp threadprivate(generator) +lbann::rng_gen OMP_generator; +#pragma omp threadprivate(OMP_generator) -lbann::fast_rng_gen fast_generator; -#pragma omp threadprivate(fast_generator) +lbann::fast_rng_gen OMP_fast_generator; +#pragma omp threadprivate(OMP_fast_generator) bool OMP_generator_inited = false; #pragma omp threadprivate(OMP_generator_inited) bool OMP_fast_generator_inited = false; #pragma omp threadprivate(OMP_fast_generator_inited) -#else +#else // ! __ICC // Random number generator, file-visible only. // Defined like this to work around a GCC problem with threadprivate objects: // https://stackoverflow.com/questions/23552077/how-to-define-a-object-or-struct-as-threadprivate-in-openmp/