Skip to content
This repository has been archived by the owner on Aug 11, 2020. It is now read-only.

Commit

Permalink
Merge pull request #356 from szha/omp
Browse files Browse the repository at this point in the history
disable omp parallel for only when __CUDACC__
  • Loading branch information
szha committed Sep 24, 2018
2 parents b32d191 + 927e07d commit d579d4b
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion mshadow/packet-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ inline void MapPacketPlan(Tensor<cpu, dim, DType> _dst,
Tensor<cpu, 2, DType> dst = _dst.FlatTo2D();
const index_t xlen = packet::LowerAlign<DType, Arch>(dst.size(1));
const size_t packetSize = packet::Packet<DType, Arch>::size;
#if (MSHADOW_USE_CUDA == 0)
#ifndef __CUDACC__
#pragma omp parallel for
#endif
for (openmp_index_t y = 0; y < dst.size(0); ++y) {
Expand Down
6 changes: 3 additions & 3 deletions mshadow/tensor_cpu-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ inline void MapPlan(TRValue<R, cpu, dim, DType> *dst,
const expr::Plan<E, DType> &plan) {
Shape<2> shape = expr::ShapeCheck<dim, R>::Check(dst->self()).FlatTo2D();
expr::Plan<R, DType> dplan = expr::MakePlan(dst->self());
#if (MSHADOW_USE_CUDA == 0)
#ifndef __CUDACC__
#pragma omp parallel for
#endif
// temp remove openmp, as default setting throttles CPU
Expand Down Expand Up @@ -215,7 +215,7 @@ inline void MapReduceKeepLowest(TRValue<R, cpu, 1, DType> *dst,
// execution
expr::Plan<R, DType> dplan = MakePlan(dst->self());
expr::Plan<E, DType> splan = MakePlan(exp.self());
#if (MSHADOW_USE_CUDA == 0)
#ifndef __CUDACC__
#pragma omp parallel for
#endif
for (openmp_index_t x = 0; x < eshape[1]; ++x) {
Expand Down Expand Up @@ -248,7 +248,7 @@ inline void MapReduceKeepHighDim(TRValue<R, cpu, 1, DType> *dst,
// execution
expr::Plan<R, DType> dplan = MakePlan(dst->self());
expr::Plan<E, DType> splan = MakePlan(exp.self());
#if (MSHADOW_USE_CUDA == 0)
#ifndef __CUDACC__
#pragma omp parallel for
#endif
for (openmp_index_t c = 0; c < pshape[1]; ++c) {
Expand Down

0 comments on commit d579d4b

Please sign in to comment.