Skip to content

Commit

Permalink
Add a RuntimeOption to set inter and intra op threadpool sizes
Browse files Browse the repository at this point in the history
  • Loading branch information
VivekPanyam committed Jan 26, 2021
1 parent 9199206 commit 7387010
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 1 deletion.
20 changes: 19 additions & 1 deletion source/neuropod/backends/tensorflow/tf_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ void check_tf_status(const tensorflow::Status &status)
}

// Get TF session options given Neuropod RuntimeOptions
tensorflow::SessionOptions get_tf_opts(const RuntimeOptions & /*unused*/)
tensorflow::SessionOptions get_tf_opts(const RuntimeOptions &runtime_opts)
{
tensorflow::SessionOptions opts;

Expand All @@ -103,6 +103,24 @@ tensorflow::SessionOptions get_tf_opts(const RuntimeOptions & /*unused*/)
opts.config.set_allow_soft_placement(true);
opts.config.set_log_device_placement(false);

// Set intra and inter op parallelism
// See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto
if (runtime_opts.intra_op_parallelism_threads != 0)
{
opts.config.set_intra_op_parallelism_threads(runtime_opts.intra_op_parallelism_threads);
}

if (runtime_opts.inter_op_parallelism_threads == 1)
{
// Only use the caller thread
opts.config.set_inter_op_parallelism_threads(-1);
}
else if (runtime_opts.inter_op_parallelism_threads > 1)
{
// The number in runtime_opts includes the caller thread
opts.config.set_inter_op_parallelism_threads(runtime_opts.inter_op_parallelism_threads - 1);
}

// Note: we can't use GPUOptions::visible_device_list as it is a per process setting
//
// From: https://github.com/tensorflow/tensorflow/issues/18861#issuecomment-385610497
Expand Down
12 changes: 12 additions & 0 deletions source/neuropod/backends/torchscript/torch_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,18 @@ std::mutex loaded_op_mutex;
TorchNeuropodBackend::TorchNeuropodBackend(const std::string &neuropod_path, const RuntimeOptions &options)
: NeuropodBackendWithDefaultAllocator<TorchNeuropodTensor>(neuropod_path, options)
{
// Set intra and inter op parallelism
// See https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#runtime-api
if (options.inter_op_parallelism_threads != 0)
{
at::set_num_interop_threads(options.inter_op_parallelism_threads);
}

if (options.intra_op_parallelism_threads != 0)
{
at::set_num_threads(options.intra_op_parallelism_threads);
}

if (options.load_model_at_construction)
{
load_model();
Expand Down
13 changes: 13 additions & 0 deletions source/neuropod/options.hh
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,19 @@ struct RuntimeOptions

// Whether or not to disable shape and type checking when running inference
bool disable_shape_and_type_checking = false;

// Set the intra and inter op parallelism for the underlying framework
// Within a given process, only the first usage of the below configuration is used
// See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto
// and https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#runtime-api
// for more details
// For true per-model control of these values, use out-of-process execution (see above)
// A value of 0 means system defined
uint32_t intra_op_parallelism_threads = 0;

// A value of 0 means system defined
// Note: this count includes the caller thread
uint32_t inter_op_parallelism_threads = 0;
};

} // namespace neuropod

0 comments on commit 7387010

Please sign in to comment.