Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix cleanup rng omp #2404

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion include/lbann/utils/options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ namespace lbann {
#define LBANN_OPTION_MODEL "model"
#define LBANN_OPTION_NUM_EPOCHS "num_epochs"
#define LBANN_OPTION_NUM_IO_THREADS "Num. IO threads"
#define LBANN_OPTION_MAX_IO_RNG_BANKS "Max IO RNG banks"
#define LBANN_OPTION_OPTIMIZER "optimizer"
#define LBANN_OPTION_PROCS_PER_TRAINER "Processes per trainer"
#define LBANN_OPTION_PROTOTEXT "prototext"
Expand Down
15 changes: 13 additions & 2 deletions include/lbann/utils/random_number_generators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,17 +85,28 @@ struct locked_io_rng_ref

/**
* Return a reference to the global LBANN random number generator.
* @note If compiling with OpenMP, this is stored in a threadprivate variable.
*/
rng_gen& get_generator();

/**
* Return a reference to a possibly-faster global LBANN random number generator.
* Compared to get_generator, this should be slightly faster.
* @note If compiling with OpenMP, this is stored in a threadprivate variable.
*/
fast_rng_gen& get_fast_generator();

/**
* Return a reference to the OMP thread local LBANN random number generator.
* @note This is stored in a threadprivate variable.
*/
rng_gen& get_OMP_generator();

/**
* Return a reference to a possibly-faster OMP thread local LBANN random number
* generator. Compared to get_generator, this should be slightly faster.
* @note This is stored in a threadprivate variable.
*/
fast_rng_gen& get_OMP_fast_generator();

/**
* Return a reference to a global LBANN random number generator for LTFB.
* @note If compiling with OpenMP, this is stored in a threadprivate variable.
Expand Down
9 changes: 5 additions & 4 deletions src/callbacks/mixup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
#include "lbann/proto/proto_common.hpp"
#include "lbann/utils/beta.hpp"
#include "lbann/utils/exception.hpp"
#include "lbann/utils/profiling.hpp"
#include "lbann/utils/protobuf.hpp"
#include "lbann/utils/serialize.hpp"
#include "lbann/utils/profiling.hpp"
#include <algorithm>

#include "lbann/proto/callbacks.pb.h"
Expand Down Expand Up @@ -96,7 +96,6 @@ void mixup::on_forward_prop_end(model* m, Layer* l)
El::Int mbsize = samples.Width();
const El::Int samples_height = samples.Height();
const El::Int labels_height = labels.Height();
auto& gen = get_fast_generator();
beta_distribution<float> dist(m_alpha, m_alpha);

// For now, data must be on the CPU.
Expand All @@ -108,15 +107,17 @@ void mixup::on_forward_prop_end(model* m, Layer* l)
// Decide how to mix the mini-batch.
std::vector<El::Int> shuffled_indices(mbsize);
std::iota(shuffled_indices.begin(), shuffled_indices.end(), 0);
std::shuffle(shuffled_indices.begin(), shuffled_indices.end(), gen);
std::shuffle(shuffled_indices.begin(),
shuffled_indices.end(),
get_fast_generator());

LBANN_OMP_PARALLEL_FOR
for (El::Int i = 0; i < mbsize; ++i) {
const El::Int j = shuffled_indices[i];
if (i == j) {
continue;
}
float lambda = dist(gen);
float lambda = dist(get_OMP_fast_generator());
lambda = std::max(lambda, 1.0f - lambda);
const float lambda_sub = 1.0f - lambda;
const DataType* __restrict__ x1_buf = samples.LockedBuffer(0, i);
Expand Down
11 changes: 3 additions & 8 deletions src/utils/lbann_library.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,8 @@ trainer& construct_trainer(lbann_comm* comm,
#endif

// Initialize the general RNGs and the data sequence RNGs
int max_io_rng_banks = arg_parser.get<int>(LBANN_OPTION_MAX_IO_RNG_BANKS);
// Create a set of RNG banks for both training and validation type phases
init_random(random_seed, max_io_rng_banks * 2);
init_random(random_seed, io_threads_per_process * 2);
init_data_seq_random(data_seq_random_seed);
init_ltfb_random(root_random_seed);
global_trainer_->set_random_seeds(root_random_seed,
Expand Down Expand Up @@ -374,21 +373,17 @@ std::unique_ptr<thread_pool> construct_io_thread_pool(lbann_comm* comm,

auto& arg_parser = global_argument_parser();
int req_io_threads = arg_parser.get<int>(LBANN_OPTION_NUM_IO_THREADS);
int max_io_rng_banks = arg_parser.get<int>(LBANN_OPTION_MAX_IO_RNG_BANKS);
// Limit the number of I/O threads to:
// < number of available free cores per process
// < number of RNG banks provisioned
// and at least one
int num_io_threads = std::max(
std::min(max_io_rng_banks, std::min(max_io_threads, req_io_threads)),
1);
int num_io_threads = std::max(std::min(max_io_threads, req_io_threads), 1);

auto io_threads_offset = free_core_offset(comm);

if (comm->am_world_master()) {
std::cout << "\tNum. I/O Threads: " << num_io_threads
<< " (Limited to # Unused Compute Cores [" << max_io_threads
<< "] # of RNG banks [" << max_io_rng_banks
<< "] # of RNG banks [" << (num_io_threads * 2)
<< "] or 1) at offset " << io_threads_offset << std::endl;
}

Expand Down
7 changes: 0 additions & 7 deletions src/utils/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,13 +187,6 @@ void construct_std_options()
"[STD] Number of threads available to both I/O and "
"initial data transformations for each rank. (Default: 4)",
4);
arg_parser.add_option(
LBANN_OPTION_MAX_IO_RNG_BANKS,
{"--max_io_thread_rngs"},
utils::ENV("LBANN_MAX_IO_RNG_BANKS"),
"[STD] Maximum number of random number generator banks available to "
"both I/O and initial data transformations for each rank. (Default: 128)",
128);
arg_parser.add_option(
LBANN_OPTION_OMP_NUM_THREADS,
{"--omp_num_threads"},
Expand Down
Loading
Loading