Merge pull request #356 from szha/omp

disable omp parallel for only when __CUDACC__
dmlc · Sep 24, 2018 · d579d4b · d579d4b
2 parents b32d191 + 927e07d
commit d579d4b
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 4 deletions.
diff --git a/mshadow/packet-inl.h b/mshadow/packet-inl.h
@@ -396,7 +396,7 @@ inline void MapPacketPlan(Tensor<cpu, dim, DType> _dst,
   Tensor<cpu, 2, DType> dst = _dst.FlatTo2D();
   const index_t xlen = packet::LowerAlign<DType, Arch>(dst.size(1));
   const size_t packetSize = packet::Packet<DType, Arch>::size;
-#if (MSHADOW_USE_CUDA == 0)
+#ifndef __CUDACC__
   #pragma omp parallel for
 #endif
   for (openmp_index_t y = 0; y < dst.size(0); ++y) {

diff --git a/mshadow/tensor_cpu-inl.h b/mshadow/tensor_cpu-inl.h
@@ -146,7 +146,7 @@ inline void MapPlan(TRValue<R, cpu, dim, DType> *dst,
                     const expr::Plan<E, DType> &plan) {
   Shape<2> shape = expr::ShapeCheck<dim, R>::Check(dst->self()).FlatTo2D();
   expr::Plan<R, DType> dplan = expr::MakePlan(dst->self());
-#if (MSHADOW_USE_CUDA == 0)
+#ifndef __CUDACC__
   #pragma omp parallel for
 #endif
   // temp remove openmp, as default setting throttles CPU
@@ -215,7 +215,7 @@ inline void MapReduceKeepLowest(TRValue<R, cpu, 1, DType> *dst,
   // execution
   expr::Plan<R, DType> dplan = MakePlan(dst->self());
   expr::Plan<E, DType> splan = MakePlan(exp.self());
-#if (MSHADOW_USE_CUDA == 0)
+#ifndef __CUDACC__
   #pragma omp parallel for
 #endif
   for (openmp_index_t x = 0; x < eshape[1]; ++x) {
@@ -248,7 +248,7 @@ inline void MapReduceKeepHighDim(TRValue<R, cpu, 1, DType> *dst,
   // execution
   expr::Plan<R, DType> dplan = MakePlan(dst->self());
   expr::Plan<E, DType> splan = MakePlan(exp.self());
-#if (MSHADOW_USE_CUDA == 0)
+#ifndef __CUDACC__
   #pragma omp parallel for
 #endif
   for (openmp_index_t c = 0; c < pshape[1]; ++c) {