Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Refactor LibraryInitializer so it's thread safe. Fixes random sporadical concurrency crashes. #15762

Merged
merged 7 commits into from
Aug 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ mxnet_option(USE_OLDCMAKECUDA "Build with old cmake cuda" OFF)
mxnet_option(USE_NCCL "Use NVidia NCCL with CUDA" OFF)
mxnet_option(USE_OPENCV "Build with OpenCV support" ON)
mxnet_option(USE_OPENMP "Build with Openmp support" ON)
mxnet_option(USE_OPENMP_BUNDLED_LLVM "Build with bundled llvm openmp from 3rdparty" OFF)
mxnet_option(USE_CUDNN "Build with cudnn support" ON) # one could set CUDNN_ROOT for search path
mxnet_option(USE_SSE "Build with x86 SSE instruction support" ON IF NOT ARM)
mxnet_option(USE_F16C "Build with x86 F16C instruction support" ON) # autodetects support if ON
Expand Down Expand Up @@ -433,11 +434,11 @@ if(USE_OPENMP)
find_package(OpenMP REQUIRED)
# This should build on Windows, but there's some problem and I don't have a Windows box, so
# could a Windows user please fix?
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp/CMakeLists.txt
AND SYSTEM_ARCHITECTURE STREQUAL "x86_64"
AND NOT MSVC
AND NOT CMAKE_CROSSCOMPILING)

if(USE_OPENMP_BUNDLED_LLVM AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp/CMakeLists.txt
AND SYSTEM_ARCHITECTURE STREQUAL "x86_64"
AND NOT MSVC
AND NOT CMAKE_CROSSCOMPILING)
message("Using bundlded LLVM OpenMP")
# Intel/llvm OpenMP: https://github.com/llvm-mirror/openmp
set(OPENMP_STANDALONE_BUILD TRUE)
set(LIBOMP_ENABLE_SHARED TRUE)
Expand All @@ -451,6 +452,7 @@ if(USE_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
add_definitions(-DMXNET_USE_OPENMP=1)
else()
message("Using platform provided OpenMP")
if(OPENMP_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
Expand Down
10 changes: 8 additions & 2 deletions docs/faq/env_var.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ $env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0

## Set the Number of Threads

* MXNET_OMP_MAX_THREADS
- Values: Int ```(default=Number of processors / Number of processors * 2 in X86)```
- Maximum number of threads to use in individual operators through OpenMP. If not set, OMP_NUM_THREADS is considered after.
* MXNET_GPU_WORKER_NTHREADS
- Values: Int ```(default=2)```
- The maximum number of threads to use on each GPU. This parameter is used to parallelize the computation within a single GPU card.
Expand All @@ -47,7 +50,7 @@ $env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
- The maximum number of concurrent threads that do the memory copy job on each GPU.
* MXNET_CPU_WORKER_NTHREADS
- Values: Int ```(default=1)```
- The maximum number of scheduling threads on CPU. It specifies how many operators can be run in parallel. Note that most CPU operators are parallelized by OpenMP. To change the number of threads used by individual operators, please set `OMP_NUM_THREADS` instead.
- The maximum number of scheduling threads on CPU. It specifies how many operators can be run in parallel. Note that most CPU operators are parallelized by OpenMP. To change the number of threads used by individual operators, please set `MXNET_OMP_MAX_THREADS` instead.
* MXNET_CPU_PRIORITY_NTHREADS
- Values: Int ```(default=4)```
- The number of threads given to prioritized CPU jobs.
Expand All @@ -56,10 +59,13 @@ $env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
- The number of threads used for NNPACK. NNPACK package aims to provide high-performance implementations of some layers for multi-core CPUs. Checkout [NNPACK](http://mxnet.io/faq/nnpack.html) to know more about it.
* MXNET_MP_WORKER_NTHREADS
- Values: Int ```(default=1)```
- The number of scheduling threads on CPU given to multiprocess workers. Enlarge this number allows more operators to run in parallel in individual workers but please consider reducing the overall `num_workers` to avoid thread contention (not available on Windows).
- The number of scheduling threads on CPU given to multiprocess workers (after fork). Enlarge this number allows more operators to run in parallel in individual workers but please consider reducing the overall `num_workers` to avoid thread contention (not available on Windows).
* MXNET_MP_OPENCV_NUM_THREADS
- Values: Int ```(default=0)```
- The number of OpenCV execution threads given to multiprocess workers. OpenCV multithreading is disabled if `MXNET_MP_OPENCV_NUM_THREADS` < 1 (default). Enlarge this number may boost the performance of individual workers when executing underlying OpenCV functions but please consider reducing the overall `num_workers` to avoid thread contention (not available on Windows).
* MXNET_GPU_COPY_NTHREADS
- Values:: Int ```(default=2)```
- Number of threads for copying data from CPU to GPU.

## Memory Options

Expand Down
4 changes: 2 additions & 2 deletions src/c_api/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@
#include "mxnet/libinfo.h"
#include "mxnet/imperative.h"
#include "mxnet/lib_api.h"
#include "../initialize.h"
#include "./c_api_common.h"
#include "../operator/custom/custom-inl.h"
#include "../operator/tensor/matrix_op-inl.h"
#include "../operator/tvmop/op_module.h"
#include "../common/utils.h"
#include "../common/library.h"

using namespace mxnet;

Expand Down Expand Up @@ -95,7 +95,7 @@ inline int MXAPIGetFunctionRegInfo(const FunRegType *e,
// Loads library and initializes it
int MXLoadLib(const char *path) {
API_BEGIN();
void *lib = load_lib(path);
void *lib = LibraryInitializer::Get()->lib_load(path);
if (!lib)
LOG(FATAL) << "Unable to load library";

Expand Down
125 changes: 0 additions & 125 deletions src/common/library.cc

This file was deleted.

57 changes: 0 additions & 57 deletions src/common/library.h

This file was deleted.

12 changes: 12 additions & 0 deletions src/common/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,21 @@
#include "../operator/nn/mkldnn/mkldnn_base-inl.h"
#endif

#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
#include <windows.h>
#else
#include <unistd.h>
#endif


namespace mxnet {
namespace common {

#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
inline size_t current_process_id() { return ::GetCurrentProcessId(); }
#else
inline size_t current_process_id() { return getpid(); }
#endif
/*!
* \brief IndPtr should be non-negative, in non-decreasing order, start with 0
* and end with value equal with size of indices.
Expand Down
4 changes: 3 additions & 1 deletion src/engine/threaded_engine_perdevice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <dmlc/parameter.h>
#include <dmlc/concurrency.h>
#include <dmlc/thread_group.h>
#include "../initialize.h"
#include "./threaded_engine.h"
#include "./thread_pool.h"
#include "../common/lazy_alloc_array.h"
Expand Down Expand Up @@ -76,7 +77,8 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
void Start() override {
if (is_worker_) return;
gpu_worker_nthreads_ = common::GetNumThreadsPerGPU();
cpu_worker_nthreads_ = dmlc::GetEnv("MXNET_CPU_WORKER_NTHREADS", 1);
// MXNET_CPU_WORKER_NTHREADS
cpu_worker_nthreads_ = LibraryInitializer::Get()->cpu_worker_nthreads_;
gpu_copy_nthreads_ = dmlc::GetEnv("MXNET_GPU_COPY_NTHREADS", 2);
// create CPU task
int cpu_priority_nthreads = dmlc::GetEnv("MXNET_CPU_PRIORITY_NTHREADS", 4);
Expand Down
Loading