From 0ed8991c232b73b914f1ed15480f54702d8e4924 Mon Sep 17 00:00:00 2001 From: Carl Yang Date: Tue, 7 Aug 2018 23:38:04 +0000 Subject: [PATCH 01/10] add fallback for gpu topology detection using CUDA 9.2 --- src/kvstore/comm_tree.h | 16 +++++----- src/kvstore/gpu_topology.h | 62 ++++++++++++++++++++++++++++++++++---- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/src/kvstore/comm_tree.h b/src/kvstore/comm_tree.h index 1ebfcdc8010d..050958e9a63f 100644 --- a/src/kvstore/comm_tree.h +++ b/src/kvstore/comm_tree.h @@ -77,9 +77,6 @@ class CommDeviceTree : public CommDevice { // BroadcastRowSparse InitMergeBuffer(devs_); InitMergeBufferTree(); - if (dmlc::GetEnv("MXNET_ENABLE_GPU_P2P", 1)) { - EnableP2P(); - } } } @@ -328,7 +325,7 @@ class CommDeviceTree : public CommDevice { } private: - void EnableP2P() { + void EnableP2P(std::vector* p2p) { #if MXNET_USE_CUDA std::vector gpus; for (const auto& d : devs_) { @@ -338,7 +335,8 @@ class CommDeviceTree : public CommDevice { } int n = static_cast(gpus.size()); int enabled = 0; - std::vector p2p(n*n); + p2p->clear(); + p2p->resize(n*n, 0); for (int i = 0; i < n; ++i) { cudaSetDevice(gpus[i]); for (int j = 0; j < n; j++) { @@ -348,7 +346,7 @@ class CommDeviceTree : public CommDevice { cudaError_t e = cudaDeviceEnablePeerAccess(gpus[j], 0); if (e == cudaSuccess || e == cudaErrorPeerAccessAlreadyEnabled) { ++enabled; - p2p[i*n+j] = 1; + (*p2p)[i*n+j] = 1; } } } @@ -362,7 +360,7 @@ class CommDeviceTree : public CommDevice { std::string access(n, '.'); for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { - access[j] = p2p[i*n+j] ? 'v' : '.'; + access[j] = (*p2p)[i*n+j] ? 'v' : '.'; } LOG(WARNING) << access; } @@ -373,7 +371,9 @@ class CommDeviceTree : public CommDevice { void QueryTopology() { #if MXNET_USE_CUDA std::vector link_matrix(devs_.size()*devs_.size()); - GetP2PWeight(devs_, &link_matrix); + std::vector p2p_matrix(devs_.size()*devs_.size()); + EnableP2P(&p2p_matrix); + GetP2PWeight(devs_, p2p_matrix, &link_matrix); if (backtrack_) LOG(INFO) << "Using Backtracking to generate trees"; else diff --git a/src/kvstore/gpu_topology.h b/src/kvstore/gpu_topology.h index a8801499c3be..d73d94d647fe 100644 --- a/src/kvstore/gpu_topology.h +++ b/src/kvstore/gpu_topology.h @@ -123,6 +123,9 @@ inline bool IsConnected(const std::vector& matrix, int num_gpus) { /** * \brief Generate adjacency matrix with row/col numbering from 0, 1, ..., n_gpu * \param devs is a vector of GPU contexts + * \param p2p_matrix is adjacency matrix of P2P connections where + * 0: no P2P connection + * 1: P2P connection * \param matrix is adjacency matrix of link topology graph * where edge weight represents relative performance of NVIDIA GPUs * 0: Self-connection @@ -131,7 +134,9 @@ inline bool IsConnected(const std::vector& matrix, int num_gpus) { * 3: 2 NVLink connections */ template -inline void GetP2PWeight(const std::vector& devs, std::vector* matrix) { +inline void GetP2PWeight(const std::vector& devs, + const std::vector& p2p_matrix, + std::vector* matrix) { int num_gpus = devs.size(); int count = 0; std::vector zero_dev_id(num_gpus, -1); @@ -161,11 +166,54 @@ inline void GetP2PWeight(const std::vector& devs, std::vector* matri } } - // Check that all GPUs have at least 1 NVLink connection - int max_value = 0; - for (unsigned int i = 0; i < max.size(); ++i) { - if (max[i] > max_value) - max_value = max[i]; + // Check that all P2P connections are detected by GetP2PAttribute + // If yes, then continue as before + // If not, then treat fallback to using p2p_matrix (from EnableP2P) + // + // We have observed that with CUDA 9.0 p3.16xlarge: + // + // 0 2 2 3 3 1 1 1 . v v v v . . . + // 2 0 3 2 1 3 1 1 v . v v . v . . + // 2 3 0 3 1 1 2 1 v v . v . . v . + // 3 2 3 0 1 1 1 2 v v v . . . . v + // 3 1 1 1 0 2 2 3 v . . . . v v v + // 1 3 1 1 2 0 3 2 . v . . v . v v + // 1 1 2 1 2 3 0 3 . . v . v v . v + // 1 1 1 2 3 2 3 0 . . . v v v v . + // + // matrix p2p_matrix + // + // Here, they are correctly detected, because the 2s and 3s correspond to + // links that have P2P connections between them. However for CUDA 9.2 p3.16xlarge: + // + // 0 2 2 1 1 1 1 1 . v v v v . . . + // 2 0 1 2 1 1 1 1 v . v v . v . . + // 2 1 0 1 1 1 2 1 v v . v . . v . + // 1 2 1 0 1 1 1 2 v v v . . . . v + // 1 1 1 1 0 2 2 1 v . . . . v v v + // 1 1 1 1 2 0 1 2 . v . . v . v v + // 1 1 2 1 2 1 0 1 . . v . v v . v + // 1 1 1 2 1 2 1 0 . . . v v v v . + // + // matrix p2p_matrix + // + // The fastest connections (3 - double NVLink) are not recognized as being any + // different from (1 - non-P2P PCI-E). This is why we fallback to p2p_matrix. + bool matrix_correct = true; + for (unsigned i = 0; i < p2p_matrix.size(); ++i) { + if (p2p_matrix[i] > 0 && (*matrix)[i] == 1) { + matrix_correct = false; + break; + } + } + + if (!matrix_correct) { + for (unsigned i = 0; i < p2p_matrix.size(); ++i) { + if (p2p_matrix[i] > 0) + (*matrix)[i] = 2; + else + (*matrix)[i] = 1; + } } // If all GPUs are connected by NVLink, then we can use NVLink only @@ -188,6 +236,8 @@ inline void GetP2PWeight(const std::vector& devs, std::vector* matri matrix_value = (matrix_value == 1) ? 1./num_gpus : matrix_value; } } + if (kLogTree) + PrintMatrix("Weight", *matrix, num_gpus, num_gpus); #else LOG(WARNING) << "GPU required for link topology"; From dad855f74174e5b71705500dbe691c6aa61856cc Mon Sep 17 00:00:00 2001 From: Carl Yang Date: Tue, 7 Aug 2018 23:38:04 +0000 Subject: [PATCH 02/10] add fallback for gpu topology detection using CUDA 9.2 --- src/kvstore/comm_tree.h | 16 +++++----- src/kvstore/gpu_topology.h | 62 ++++++++++++++++++++++++++++++++++---- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/src/kvstore/comm_tree.h b/src/kvstore/comm_tree.h index b62228cd2885..11d99c021917 100644 --- a/src/kvstore/comm_tree.h +++ b/src/kvstore/comm_tree.h @@ -77,9 +77,6 @@ class CommDeviceTree : public CommDevice { // BroadcastRowSparse InitMergeBuffer(devs_); InitMergeBufferTree(); - if (dmlc::GetEnv("MXNET_ENABLE_GPU_P2P", 1)) { - EnableP2P(); - } } } @@ -328,7 +325,7 @@ class CommDeviceTree : public CommDevice { } private: - void EnableP2P() { + void EnableP2P(std::vector* p2p) { #if MXNET_USE_CUDA std::vector gpus; for (const auto& d : devs_) { @@ -338,7 +335,8 @@ class CommDeviceTree : public CommDevice { } int n = static_cast(gpus.size()); int enabled = 0; - std::vector p2p(n*n); + p2p->clear(); + p2p->resize(n*n, 0); for (int i = 0; i < n; ++i) { mxnet::common::cuda::DeviceStore device_store(gpus[i]); for (int j = 0; j < n; j++) { @@ -348,7 +346,7 @@ class CommDeviceTree : public CommDevice { cudaError_t e = cudaDeviceEnablePeerAccess(gpus[j], 0); if (e == cudaSuccess || e == cudaErrorPeerAccessAlreadyEnabled) { ++enabled; - p2p[i*n+j] = 1; + (*p2p)[i*n+j] = 1; } } } @@ -362,7 +360,7 @@ class CommDeviceTree : public CommDevice { std::string access(n, '.'); for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { - access[j] = p2p[i*n+j] ? 'v' : '.'; + access[j] = (*p2p)[i*n+j] ? 'v' : '.'; } LOG(WARNING) << access; } @@ -373,7 +371,9 @@ class CommDeviceTree : public CommDevice { void QueryTopology() { #if MXNET_USE_CUDA std::vector link_matrix(devs_.size()*devs_.size()); - GetP2PWeight(devs_, &link_matrix); + std::vector p2p_matrix(devs_.size()*devs_.size()); + EnableP2P(&p2p_matrix); + GetP2PWeight(devs_, p2p_matrix, &link_matrix); if (backtrack_) LOG(INFO) << "Using Backtracking to generate trees"; else diff --git a/src/kvstore/gpu_topology.h b/src/kvstore/gpu_topology.h index 92bcf1a5d263..00a2525d0e9c 100644 --- a/src/kvstore/gpu_topology.h +++ b/src/kvstore/gpu_topology.h @@ -123,6 +123,9 @@ inline bool IsConnected(const std::vector& matrix, int num_gpus) { /** * \brief Generate adjacency matrix with row/col numbering from 0, 1, ..., n_gpu * \param devs is a vector of GPU contexts + * \param p2p_matrix is adjacency matrix of P2P connections where + * 0: no P2P connection + * 1: P2P connection * \param matrix is adjacency matrix of link topology graph * where edge weight represents relative performance of NVIDIA GPUs * 0: Self-connection @@ -131,7 +134,9 @@ inline bool IsConnected(const std::vector& matrix, int num_gpus) { * 3: 2 NVLink connections */ template -inline void GetP2PWeight(const std::vector& devs, std::vector* matrix) { +inline void GetP2PWeight(const std::vector& devs, + const std::vector& p2p_matrix, + std::vector* matrix) { int num_gpus = devs.size(); int count = 0; std::vector zero_dev_id(num_gpus, -1); @@ -161,11 +166,54 @@ inline void GetP2PWeight(const std::vector& devs, std::vector* matri } } - // Check that all GPUs have at least 1 NVLink connection - int max_value = 0; - for (unsigned int i = 0; i < max.size(); ++i) { - if (max[i] > max_value) - max_value = max[i]; + // Check that all P2P connections are detected by GetP2PAttribute + // If yes, then continue as before + // If not, then treat fallback to using p2p_matrix (from EnableP2P) + // + // We have observed that with CUDA 9.0 p3.16xlarge: + // + // 0 2 2 3 3 1 1 1 . v v v v . . . + // 2 0 3 2 1 3 1 1 v . v v . v . . + // 2 3 0 3 1 1 2 1 v v . v . . v . + // 3 2 3 0 1 1 1 2 v v v . . . . v + // 3 1 1 1 0 2 2 3 v . . . . v v v + // 1 3 1 1 2 0 3 2 . v . . v . v v + // 1 1 2 1 2 3 0 3 . . v . v v . v + // 1 1 1 2 3 2 3 0 . . . v v v v . + // + // matrix p2p_matrix + // + // Here, they are correctly detected, because the 2s and 3s correspond to + // links that have P2P connections between them. However for CUDA 9.2 p3.16xlarge: + // + // 0 2 2 1 1 1 1 1 . v v v v . . . + // 2 0 1 2 1 1 1 1 v . v v . v . . + // 2 1 0 1 1 1 2 1 v v . v . . v . + // 1 2 1 0 1 1 1 2 v v v . . . . v + // 1 1 1 1 0 2 2 1 v . . . . v v v + // 1 1 1 1 2 0 1 2 . v . . v . v v + // 1 1 2 1 2 1 0 1 . . v . v v . v + // 1 1 1 2 1 2 1 0 . . . v v v v . + // + // matrix p2p_matrix + // + // The fastest connections (3 - double NVLink) are not recognized as being any + // different from (1 - non-P2P PCI-E). This is why we fallback to p2p_matrix. + bool matrix_correct = true; + for (unsigned i = 0; i < p2p_matrix.size(); ++i) { + if (p2p_matrix[i] > 0 && (*matrix)[i] == 1) { + matrix_correct = false; + break; + } + } + + if (!matrix_correct) { + for (unsigned i = 0; i < p2p_matrix.size(); ++i) { + if (p2p_matrix[i] > 0) + (*matrix)[i] = 2; + else + (*matrix)[i] = 1; + } } // If all GPUs are connected by NVLink, then we can use NVLink only @@ -188,6 +236,8 @@ inline void GetP2PWeight(const std::vector& devs, std::vector* matri matrix_value = (matrix_value == 1) ? 1./num_gpus : matrix_value; } } + if (kLogTree) + PrintMatrix("Weight", *matrix, num_gpus, num_gpus); #else LOG(WARNING) << "GPU required for link topology"; From ad2baee92acd0bb94413c37127e7c8b14644a623 Mon Sep 17 00:00:00 2001 From: Carl Yang Date: Mon, 7 Jan 2019 10:06:16 -0800 Subject: [PATCH 03/10] add log --- src/kvstore/gpu_topology.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/kvstore/gpu_topology.h b/src/kvstore/gpu_topology.h index 00a2525d0e9c..699e30f7ca99 100644 --- a/src/kvstore/gpu_topology.h +++ b/src/kvstore/gpu_topology.h @@ -198,6 +198,11 @@ inline void GetP2PWeight(const std::vector& devs, // matrix p2p_matrix // // The fastest connections (3 - double NVLink) are not recognized as being any + if (kLogTree) { + PrintMatrix("matrix", *matrix, num_gpus, num_gpus); + PrintMatrix("p2p_matrix", p2p_matrix, num_gpus, num_gpus); + } + // different from (1 - non-P2P PCI-E). This is why we fallback to p2p_matrix. bool matrix_correct = true; for (unsigned i = 0; i < p2p_matrix.size(); ++i) { From 497efac539aee94a41b4277389d130c047faae9a Mon Sep 17 00:00:00 2001 From: Carl Yang Date: Mon, 7 Jan 2019 10:07:51 -0800 Subject: [PATCH 04/10] update 3rdparty to master --- 3rdparty/dmlc-core | 2 +- 3rdparty/mkldnn | 2 +- 3rdparty/mshadow | 2 +- 3rdparty/tvm | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/3rdparty/dmlc-core b/3rdparty/dmlc-core index 0a0e8addf92e..649be18a8c55 160000 --- a/3rdparty/dmlc-core +++ b/3rdparty/dmlc-core @@ -1 +1 @@ -Subproject commit 0a0e8addf92e1287fd7a25c6314016b8c0138dee +Subproject commit 649be18a8c55c48517861d67158a45dec54992ee diff --git a/3rdparty/mkldnn b/3rdparty/mkldnn index a7c5f53832ac..0e7ca738866d 160000 --- a/3rdparty/mkldnn +++ b/3rdparty/mkldnn @@ -1 +1 @@ -Subproject commit a7c5f53832acabade6e5086e72c960adedb3c38a +Subproject commit 0e7ca738866d22cc700aa33b8de120b938f910d0 diff --git a/3rdparty/mshadow b/3rdparty/mshadow index 6dc04f7c729c..463c0dffe3ea 160000 --- a/3rdparty/mshadow +++ b/3rdparty/mshadow @@ -1 +1 @@ -Subproject commit 6dc04f7c729cd5c6c6210d5d4d2026a26ce0bfbf +Subproject commit 463c0dffe3eae8c39caf7989c85b7244823df27e diff --git a/3rdparty/tvm b/3rdparty/tvm index 0f053c82a747..426e3bb0a8e8 160000 --- a/3rdparty/tvm +++ b/3rdparty/tvm @@ -1 +1 @@ -Subproject commit 0f053c82a747b4dcdf49570ec87c17e0067b7439 +Subproject commit 426e3bb0a8e86bb48a25b950fd8ef965ca5d370b From df2262e05cc19f1ec7d4bca88bf65c5113f183b2 Mon Sep 17 00:00:00 2001 From: Carl Yang Date: Tue, 7 Aug 2018 23:38:04 +0000 Subject: [PATCH 05/10] add fallback for gpu topology detection using CUDA 9.2 --- src/kvstore/comm_tree.h | 16 +++++----- src/kvstore/gpu_topology.h | 62 ++++++++++++++++++++++++++++++++++---- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/src/kvstore/comm_tree.h b/src/kvstore/comm_tree.h index b62228cd2885..11d99c021917 100644 --- a/src/kvstore/comm_tree.h +++ b/src/kvstore/comm_tree.h @@ -77,9 +77,6 @@ class CommDeviceTree : public CommDevice { // BroadcastRowSparse InitMergeBuffer(devs_); InitMergeBufferTree(); - if (dmlc::GetEnv("MXNET_ENABLE_GPU_P2P", 1)) { - EnableP2P(); - } } } @@ -328,7 +325,7 @@ class CommDeviceTree : public CommDevice { } private: - void EnableP2P() { + void EnableP2P(std::vector* p2p) { #if MXNET_USE_CUDA std::vector gpus; for (const auto& d : devs_) { @@ -338,7 +335,8 @@ class CommDeviceTree : public CommDevice { } int n = static_cast(gpus.size()); int enabled = 0; - std::vector p2p(n*n); + p2p->clear(); + p2p->resize(n*n, 0); for (int i = 0; i < n; ++i) { mxnet::common::cuda::DeviceStore device_store(gpus[i]); for (int j = 0; j < n; j++) { @@ -348,7 +346,7 @@ class CommDeviceTree : public CommDevice { cudaError_t e = cudaDeviceEnablePeerAccess(gpus[j], 0); if (e == cudaSuccess || e == cudaErrorPeerAccessAlreadyEnabled) { ++enabled; - p2p[i*n+j] = 1; + (*p2p)[i*n+j] = 1; } } } @@ -362,7 +360,7 @@ class CommDeviceTree : public CommDevice { std::string access(n, '.'); for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { - access[j] = p2p[i*n+j] ? 'v' : '.'; + access[j] = (*p2p)[i*n+j] ? 'v' : '.'; } LOG(WARNING) << access; } @@ -373,7 +371,9 @@ class CommDeviceTree : public CommDevice { void QueryTopology() { #if MXNET_USE_CUDA std::vector link_matrix(devs_.size()*devs_.size()); - GetP2PWeight(devs_, &link_matrix); + std::vector p2p_matrix(devs_.size()*devs_.size()); + EnableP2P(&p2p_matrix); + GetP2PWeight(devs_, p2p_matrix, &link_matrix); if (backtrack_) LOG(INFO) << "Using Backtracking to generate trees"; else diff --git a/src/kvstore/gpu_topology.h b/src/kvstore/gpu_topology.h index 92bcf1a5d263..00a2525d0e9c 100644 --- a/src/kvstore/gpu_topology.h +++ b/src/kvstore/gpu_topology.h @@ -123,6 +123,9 @@ inline bool IsConnected(const std::vector& matrix, int num_gpus) { /** * \brief Generate adjacency matrix with row/col numbering from 0, 1, ..., n_gpu * \param devs is a vector of GPU contexts + * \param p2p_matrix is adjacency matrix of P2P connections where + * 0: no P2P connection + * 1: P2P connection * \param matrix is adjacency matrix of link topology graph * where edge weight represents relative performance of NVIDIA GPUs * 0: Self-connection @@ -131,7 +134,9 @@ inline bool IsConnected(const std::vector& matrix, int num_gpus) { * 3: 2 NVLink connections */ template -inline void GetP2PWeight(const std::vector& devs, std::vector* matrix) { +inline void GetP2PWeight(const std::vector& devs, + const std::vector& p2p_matrix, + std::vector* matrix) { int num_gpus = devs.size(); int count = 0; std::vector zero_dev_id(num_gpus, -1); @@ -161,11 +166,54 @@ inline void GetP2PWeight(const std::vector& devs, std::vector* matri } } - // Check that all GPUs have at least 1 NVLink connection - int max_value = 0; - for (unsigned int i = 0; i < max.size(); ++i) { - if (max[i] > max_value) - max_value = max[i]; + // Check that all P2P connections are detected by GetP2PAttribute + // If yes, then continue as before + // If not, then treat fallback to using p2p_matrix (from EnableP2P) + // + // We have observed that with CUDA 9.0 p3.16xlarge: + // + // 0 2 2 3 3 1 1 1 . v v v v . . . + // 2 0 3 2 1 3 1 1 v . v v . v . . + // 2 3 0 3 1 1 2 1 v v . v . . v . + // 3 2 3 0 1 1 1 2 v v v . . . . v + // 3 1 1 1 0 2 2 3 v . . . . v v v + // 1 3 1 1 2 0 3 2 . v . . v . v v + // 1 1 2 1 2 3 0 3 . . v . v v . v + // 1 1 1 2 3 2 3 0 . . . v v v v . + // + // matrix p2p_matrix + // + // Here, they are correctly detected, because the 2s and 3s correspond to + // links that have P2P connections between them. However for CUDA 9.2 p3.16xlarge: + // + // 0 2 2 1 1 1 1 1 . v v v v . . . + // 2 0 1 2 1 1 1 1 v . v v . v . . + // 2 1 0 1 1 1 2 1 v v . v . . v . + // 1 2 1 0 1 1 1 2 v v v . . . . v + // 1 1 1 1 0 2 2 1 v . . . . v v v + // 1 1 1 1 2 0 1 2 . v . . v . v v + // 1 1 2 1 2 1 0 1 . . v . v v . v + // 1 1 1 2 1 2 1 0 . . . v v v v . + // + // matrix p2p_matrix + // + // The fastest connections (3 - double NVLink) are not recognized as being any + // different from (1 - non-P2P PCI-E). This is why we fallback to p2p_matrix. + bool matrix_correct = true; + for (unsigned i = 0; i < p2p_matrix.size(); ++i) { + if (p2p_matrix[i] > 0 && (*matrix)[i] == 1) { + matrix_correct = false; + break; + } + } + + if (!matrix_correct) { + for (unsigned i = 0; i < p2p_matrix.size(); ++i) { + if (p2p_matrix[i] > 0) + (*matrix)[i] = 2; + else + (*matrix)[i] = 1; + } } // If all GPUs are connected by NVLink, then we can use NVLink only @@ -188,6 +236,8 @@ inline void GetP2PWeight(const std::vector& devs, std::vector* matri matrix_value = (matrix_value == 1) ? 1./num_gpus : matrix_value; } } + if (kLogTree) + PrintMatrix("Weight", *matrix, num_gpus, num_gpus); #else LOG(WARNING) << "GPU required for link topology"; From 4ba1a83bff6c3cd70ccf43ca6aaba0e4b662af17 Mon Sep 17 00:00:00 2001 From: Carl Yang Date: Mon, 7 Jan 2019 10:06:16 -0800 Subject: [PATCH 06/10] add log --- src/kvstore/gpu_topology.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/kvstore/gpu_topology.h b/src/kvstore/gpu_topology.h index 00a2525d0e9c..699e30f7ca99 100644 --- a/src/kvstore/gpu_topology.h +++ b/src/kvstore/gpu_topology.h @@ -198,6 +198,11 @@ inline void GetP2PWeight(const std::vector& devs, // matrix p2p_matrix // // The fastest connections (3 - double NVLink) are not recognized as being any + if (kLogTree) { + PrintMatrix("matrix", *matrix, num_gpus, num_gpus); + PrintMatrix("p2p_matrix", p2p_matrix, num_gpus, num_gpus); + } + // different from (1 - non-P2P PCI-E). This is why we fallback to p2p_matrix. bool matrix_correct = true; for (unsigned i = 0; i < p2p_matrix.size(); ++i) { From 5135309b111e0445e6c22f3adaa07c3306d264da Mon Sep 17 00:00:00 2001 From: Carl Yang Date: Mon, 7 Jan 2019 10:07:51 -0800 Subject: [PATCH 07/10] update 3rdparty to master --- 3rdparty/dmlc-core | 2 +- 3rdparty/mkldnn | 2 +- 3rdparty/mshadow | 2 +- 3rdparty/tvm | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/3rdparty/dmlc-core b/3rdparty/dmlc-core index 0a0e8addf92e..649be18a8c55 160000 --- a/3rdparty/dmlc-core +++ b/3rdparty/dmlc-core @@ -1 +1 @@ -Subproject commit 0a0e8addf92e1287fd7a25c6314016b8c0138dee +Subproject commit 649be18a8c55c48517861d67158a45dec54992ee diff --git a/3rdparty/mkldnn b/3rdparty/mkldnn index a7c5f53832ac..0e7ca738866d 160000 --- a/3rdparty/mkldnn +++ b/3rdparty/mkldnn @@ -1 +1 @@ -Subproject commit a7c5f53832acabade6e5086e72c960adedb3c38a +Subproject commit 0e7ca738866d22cc700aa33b8de120b938f910d0 diff --git a/3rdparty/mshadow b/3rdparty/mshadow index 6dc04f7c729c..463c0dffe3ea 160000 --- a/3rdparty/mshadow +++ b/3rdparty/mshadow @@ -1 +1 @@ -Subproject commit 6dc04f7c729cd5c6c6210d5d4d2026a26ce0bfbf +Subproject commit 463c0dffe3eae8c39caf7989c85b7244823df27e diff --git a/3rdparty/tvm b/3rdparty/tvm index 0f053c82a747..426e3bb0a8e8 160000 --- a/3rdparty/tvm +++ b/3rdparty/tvm @@ -1 +1 @@ -Subproject commit 0f053c82a747b4dcdf49570ec87c17e0067b7439 +Subproject commit 426e3bb0a8e86bb48a25b950fd8ef965ca5d370b From 9ae5723bbe7b29eee9739466211866ec6a576e17 Mon Sep 17 00:00:00 2001 From: Carl Yang Date: Fri, 11 Jan 2019 19:53:14 +0000 Subject: [PATCH 08/10] bring 3rdparty packages to upstream/master --- 3rdparty/dmlc-core | 2 +- 3rdparty/mkldnn | 2 +- 3rdparty/mshadow | 2 +- 3rdparty/tvm | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/3rdparty/dmlc-core b/3rdparty/dmlc-core index 649be18a8c55..0a0e8addf92e 160000 --- a/3rdparty/dmlc-core +++ b/3rdparty/dmlc-core @@ -1 +1 @@ -Subproject commit 649be18a8c55c48517861d67158a45dec54992ee +Subproject commit 0a0e8addf92e1287fd7a25c6314016b8c0138dee diff --git a/3rdparty/mkldnn b/3rdparty/mkldnn index 0e7ca738866d..a7c5f53832ac 160000 --- a/3rdparty/mkldnn +++ b/3rdparty/mkldnn @@ -1 +1 @@ -Subproject commit 0e7ca738866d22cc700aa33b8de120b938f910d0 +Subproject commit a7c5f53832acabade6e5086e72c960adedb3c38a diff --git a/3rdparty/mshadow b/3rdparty/mshadow index 463c0dffe3ea..6dc04f7c729c 160000 --- a/3rdparty/mshadow +++ b/3rdparty/mshadow @@ -1 +1 @@ -Subproject commit 463c0dffe3eae8c39caf7989c85b7244823df27e +Subproject commit 6dc04f7c729cd5c6c6210d5d4d2026a26ce0bfbf diff --git a/3rdparty/tvm b/3rdparty/tvm index 426e3bb0a8e8..0f053c82a747 160000 --- a/3rdparty/tvm +++ b/3rdparty/tvm @@ -1 +1 @@ -Subproject commit 426e3bb0a8e86bb48a25b950fd8ef965ca5d370b +Subproject commit 0f053c82a747b4dcdf49570ec87c17e0067b7439 From 50d6630acddd34ad8181c8ee28b7d94eaef61406 Mon Sep 17 00:00:00 2001 From: Carl Yang Date: Fri, 11 Jan 2019 20:17:06 +0000 Subject: [PATCH 09/10] rebase to master --- 3rdparty/dmlc-core | 2 +- 3rdparty/mkldnn | 2 +- 3rdparty/mshadow | 2 +- 3rdparty/tvm | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/3rdparty/dmlc-core b/3rdparty/dmlc-core index 649be18a8c55..0a0e8addf92e 160000 --- a/3rdparty/dmlc-core +++ b/3rdparty/dmlc-core @@ -1 +1 @@ -Subproject commit 649be18a8c55c48517861d67158a45dec54992ee +Subproject commit 0a0e8addf92e1287fd7a25c6314016b8c0138dee diff --git a/3rdparty/mkldnn b/3rdparty/mkldnn index 0e7ca738866d..a7c5f53832ac 160000 --- a/3rdparty/mkldnn +++ b/3rdparty/mkldnn @@ -1 +1 @@ -Subproject commit 0e7ca738866d22cc700aa33b8de120b938f910d0 +Subproject commit a7c5f53832acabade6e5086e72c960adedb3c38a diff --git a/3rdparty/mshadow b/3rdparty/mshadow index 463c0dffe3ea..6dc04f7c729c 160000 --- a/3rdparty/mshadow +++ b/3rdparty/mshadow @@ -1 +1 @@ -Subproject commit 463c0dffe3eae8c39caf7989c85b7244823df27e +Subproject commit 6dc04f7c729cd5c6c6210d5d4d2026a26ce0bfbf diff --git a/3rdparty/tvm b/3rdparty/tvm index 426e3bb0a8e8..0f053c82a747 160000 --- a/3rdparty/tvm +++ b/3rdparty/tvm @@ -1 +1 @@ -Subproject commit 426e3bb0a8e86bb48a25b950fd8ef965ca5d370b +Subproject commit 0f053c82a747b4dcdf49570ec87c17e0067b7439 From 1ca7aab71dd528e054b63f35e572a9e4fd5e4839 Mon Sep 17 00:00:00 2001 From: Carl Yang Date: Fri, 11 Jan 2019 13:21:18 -0800 Subject: [PATCH 10/10] Update gpu_topology.h --- src/kvstore/gpu_topology.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/kvstore/gpu_topology.h b/src/kvstore/gpu_topology.h index 699e30f7ca99..777fb47f9945 100644 --- a/src/kvstore/gpu_topology.h +++ b/src/kvstore/gpu_topology.h @@ -213,6 +213,8 @@ inline void GetP2PWeight(const std::vector& devs, } if (!matrix_correct) { + LOG(WARNING) << "cudaDeviceGetP2PAttribute incorrect. " + << "Falling back to cudaDeviceEnablePeerAccess for topology detection"; for (unsigned i = 0; i < p2p_matrix.size(); ++i) { if (p2p_matrix[i] > 0) (*matrix)[i] = 2;