Note: Google Test filter = OMP_TUNING.* [==========] Running 8 tests from 1 test case. [----------] Global test environment set-up. [----------] 8 tests from OMP_TUNING [ RUN ] OMP_TUNING.ShowAllTunedOps mxnet::op::PopulateFullIdxRspKernel mxnet::op::mxnet_op::set_to_int<0> mxnet::op::mshadow_op::smooth_l1_gradient mxnet::op::mshadow_op::smooth_l1_loss mxnet::op::mshadow_op::eq mxnet::op::mshadow_op::ne mxnet::op::mshadow_op::le mxnet::op::mshadow_op::lt mxnet::op::mshadow_op::hypot_grad_right mxnet::op::mshadow_op::hypot_grad_left mxnet::op::mshadow_op::hypot mxnet::op::mshadow_op::arctanh_grad mxnet::op::mshadow_op::arctan_grad mxnet::op::mshadow_op::cosh mxnet::op::mshadow_op::rpower mxnet::op::mshadow_op::minimum mxnet::op::mshadow_op::arctan mxnet::op::mshadow_op::reciprocal_square_root mxnet::op::mshadow_op::rminus mxnet::op::mshadow_op::arccosh_grad mxnet::op::mshadow_op::square_root_grad mxnet::op::mshadow_op::arctanh mxnet::op::mshadow_op::floor mxnet::op::mshadow_op::cosh_grad mxnet::op::mshadow_op::ceil mxnet::op::mshadow_op::cos_grad mxnet::op::mshadow_op::reciprocal_cube_root_grad mxnet::op::mshadow_op::arcsinh_grad mxnet::op::mshadow_op::sin mxnet::op::mshadow_op::arcsin mxnet::op::mshadow_op::log10_grad mxnet::op::mshadow_op::log1p_grad mxnet::op::mshadow_op::mod_grad mxnet::op::mshadow_op::arccos_grad mxnet::op::mshadow_op::exp mxnet::op::mshadow_op::tanh_grad mxnet::op::mshadow_op::log1p mxnet::op::mshadow_op::rint mshadow::op::minus mxnet::op::mshadow_op::relu_grad mxnet::op::mshadow_op::identity mxnet::op::mshadow_op::maximum mxnet::op::mshadow_op::reciprocal_grad mshadow::op::div mxnet::op::mshadow_op::rmod_grad mxnet::op::mshadow_op::arcsin_grad mxnet::op::mshadow_op::ge mxnet::op::mshadow_op::gammaln_grad mxnet::op::mshadow_op::sigmoid mxnet::op::mshadow_op::power_rgrad mxnet::op::mshadow_op::identity_grad mxnet::op::mshadow_op::tan mxnet::op::mshadow_op::gamma mxnet::op::mshadow_op::arcsinh mshadow::op::identity mxnet::op::mshadow_op::square_root mxnet::op::mshadow_op::reciprocal_square_root_grad mxnet::op::mshadow_op::cos mxnet::op::mshadow_op::log2 mxnet::op::mshadow_op::tanh mxnet::op::mshadow_op::arccosh mxnet::op::mshadow_op::negation mxnet::op::mshadow_op::log10 mxnet::op::mshadow_op::cube_root_grad mxnet::op::mshadow_op::expm1 mxnet::op::mshadow_op::arccos mxnet::op::mshadow_op::rmod mxnet::op::mshadow_op::softrelu_grad mxnet::op::mshadow_op::sinh mxnet::op::mshadow_op::log_grad mxnet::op::mshadow_op::sin_grad mxnet::op::mshadow_op::rdiv_grad mxnet::op::mshadow_op::log mxnet::op::mshadow_op::softrelu mxnet::op::mshadow_op::square_grad mxnet::op::mshadow_op::log2_grad mxnet::op::mshadow_op::cube_root mxnet::op::mshadow_op::reciprocal_cube_root mxnet::op::mshadow_op::sign mxnet::op::mshadow_op::square mxnet::op::mshadow_op::sign_grad mxnet::op::mshadow_op::round mxnet::op::mshadow_op::trunc mxnet::op::mshadow_op::mod_rgrad mxnet::op::mshadow_op::reciprocal mxnet::op::mshadow_op::fix mxnet::op::mshadow_op::gamma_grad mxnet::op::mshadow_op::gammaln mxnet::op::mshadow_op::degrees mshadow::op::right mxnet::op::mshadow_op::sinh_grad mxnet::op::mshadow_op::degrees_grad mshadow::op::plus mxnet::op::mshadow_op::radians mxnet::op::mshadow_op::sigmoid_grad mxnet::op::mshadow_op::radians_grad mxnet::op::mshadow_op::gt mxnet::op::mshadow_op::mod mshadow::op::mul mxnet::op::mshadow_op::rdiv mxnet::op::mshadow_op::tan_grad mxnet::op::mshadow_op::div_grad mxnet::op::mshadow_op::div_rgrad mxnet::op::mshadow_op::left mxnet::op::mshadow_op::right mxnet::op::mshadow_op::power mxnet::op::mshadow_op::power_grad mxnet::op::mshadow_op::relu mxnet::op::mshadow_op::abs mxnet::op::mshadow_op::rpower_grad [ OK ] OMP_TUNING.ShowAllTunedOps (0 ms) [ RUN ] OMP_TUNING.ExecuteBidirectional RunCoreOpBidirectional op.inputs()[0]: [dense] main shape: (5, 5) [-3.64523 , 3.35009 , 4.68868 , -2.78966 , -1.91833 ] [0.47221 , -3.11618 , 4.92881 , 4.96461 , 4.67695 ] [2.25839 , 4.81110 , -3.90138 , 2.98106 , -2.02971 ] [-4.95217 , -3.87535 , 1.39763 , 3.78431 , 0.03663 ] [2.97929 , -1.38706 , -2.88076 , 1.81360 , -1.01261 ] op.inputs()[1]: [dense] main shape: (5, 5) [2.40647 , -0.25241 , -0.77912 , -3.26135 , -1.98087 ] [2.97280 , -1.83450 , 3.72429 , -3.50886 , 4.94068 ] [3.21903 , -3.74817 , 2.63750 , -0.09411 , 1.63606 ] [-3.74103 , -2.89791 , -4.48784 , -4.63559 , -0.91269 ] [-0.42011 , -0.12431 , 2.93975 , 4.20875 , 3.07531 ] RunCoreOpBidirectional op.outputs()[0]: [dense] main shape: (5, 5) [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] RunCoreOpBidirectional op.outputs()[0]: [dense] main shape: (5, 5) [-1.23876 , 3.09767 , 3.90955 , -6.05101 , -3.89920 ] [3.44500 , -4.95068 , 8.65310 , 1.45575 , 9.61763 ] [5.47742 , 1.06292 , -1.26388 , 2.88695 , -0.39365 ] [-8.69320 , -6.77326 , -3.09020 , -0.85128 , -0.87606 ] [2.55918 , -1.51137 , 0.05899 , 6.02234 , 2.06270 ] RunCoreOpBidirectional op.bwd_inputs()[0]: [dense] main shape: (5, 5) [-3.64523 , 3.35009 , 4.68868 , -2.78966 , -1.91833 ] [0.47221 , -3.11618 , 4.92881 , 4.96461 , 4.67695 ] [2.25839 , 4.81110 , -3.90138 , 2.98106 , -2.02971 ] [-4.95217 , -3.87535 , 1.39763 , 3.78431 , 0.03663 ] [2.97929 , -1.38706 , -2.88076 , 1.81360 , -1.01261 ] RunCoreOpBidirectional op.bwd_outputs()[0]: [dense] main shape: (5, 5) [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] op.bwd_outputs()[1]: [dense] main shape: (5, 5) [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] [0.00000 , 0.00000 , 0.00000 , 0.00000 , 0.00000 ] RunCoreOpBidirectional op.bwd_outputs()[0]: [dense] main shape: (5, 5) [-3.64523 , 3.35009 , 4.68868 , -2.78966 , -1.91833 ] [0.47221 , -3.11618 , 4.92881 , 4.96461 , 4.67695 ] [2.25839 , 4.81110 , -3.90138 , 2.98106 , -2.02971 ] [-4.95217 , -3.87535 , 1.39763 , 3.78431 , 0.03663 ] [2.97929 , -1.38706 , -2.88076 , 1.81360 , -1.01261 ] op.bwd_outputs()[1]: [dense] main shape: (5, 5) [-3.64523 , 3.35009 , 4.68868 , -2.78966 , -1.91833 ] [0.47221 , -3.11618 , 4.92881 , 4.96461 , 4.67695 ] [2.25839 , 4.81110 , -3.90138 , 2.98106 , -2.02971 ] [-4.95217 , -3.87535 , 1.39763 , 3.78431 , 0.03663 ] [2.97929 , -1.38706 , -2.88076 , 1.81360 , -1.01261 ] [ OK ] OMP_TUNING.ExecuteBidirectional (1 ms) [ RUN ] OMP_TUNING.EvaluateTuneTestFloat ****************************** Operators: relu, for type: float ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.01274 ms Auto: 0.05793 ms (NeverOMP) AlwaysOMP: 2.62522 ms [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01055 ms Auto: 0.01109 ms (NeverOMP) NeverOMP: 0.01115 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.26750 ms (AlwaysOMP) AlwaysOMP: 0.34115 ms NeverOMP: 0.45208 ms [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.06531 ms (AlwaysOMP) AlwaysOMP: 0.06715 ms NeverOMP: 0.10477 ms [10,3,128,128] lhs=491,520 items (Forward) NeverOMP: 0.08876 ms Auto: 0.09173 ms (NeverOMP) AlwaysOMP: 0.88800 ms [20,3,256,256] lhs=3,932,160 items (Forward) NeverOMP: 0.35169 ms Auto: 0.35844 ms (NeverOMP) AlwaysOMP: 0.57073 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00045 ms Auto: 0.00046 ms (NeverOMP) AlwaysOMP: 2.52499 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00102 ms Auto: 0.00103 ms (NeverOMP) AlwaysOMP: 0.01052 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.01201 ms Auto: 0.23004 ms (AlwaysOMP) AlwaysOMP: 0.30350 ms *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Backward) Auto: 0.03321 ms (AlwaysOMP) AlwaysOMP: 0.04220 ms NeverOMP: 0.13843 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.05195 ms (NeverOMP) NeverOMP: 0.21861 ms AlwaysOMP: 0.73228 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 0.55728 ms (AlwaysOMP) AlwaysOMP: 0.63385 ms NeverOMP: 3.51795 ms ****************************** Operators: sigmoid, for type: float ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00871 ms Auto: 0.00885 ms (NeverOMP) AlwaysOMP: 0.01319 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.01421 ms (AlwaysOMP) AlwaysOMP: 0.01460 ms NeverOMP: 0.02580 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.05234 ms (AlwaysOMP) AlwaysOMP: 0.27000 ms NeverOMP: 0.31391 ms [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.52019 ms Auto: 0.53467 ms (AlwaysOMP) NeverOMP: 3.37814 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.81491 ms (AlwaysOMP) AlwaysOMP: 1.28339 ms NeverOMP: 5.40001 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 5.87947 ms (AlwaysOMP) AlwaysOMP: 6.07391 ms NeverOMP: 43.13438 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00023 ms Auto: 0.00025 ms (NeverOMP) AlwaysOMP: 0.01260 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00057 ms Auto: 0.00095 ms (NeverOMP) AlwaysOMP: 0.01155 ms [50,1,18,32] lhs=28,800 items (Backward) Auto: 0.01123 ms (NeverOMP) NeverOMP: 0.01148 ms AlwaysOMP: 0.16594 ms [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.03196 ms Auto: 0.05404 ms (AlwaysOMP) NeverOMP: 0.12231 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.06230 ms (NeverOMP) NeverOMP: 0.19866 ms AlwaysOMP: 0.48368 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 0.62509 ms Auto: 1.00250 ms (AlwaysOMP) NeverOMP: 3.43277 ms ****************************** Operators: sqrt, for type: float ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.00383 ms (NeverOMP) NeverOMP: 0.00539 ms AlwaysOMP: 0.01253 ms [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01194 ms NeverOMP: 0.02052 ms Auto: 2.21223 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [50,1,18,32] lhs=28,800 items (Forward) AlwaysOMP: 0.11293 ms NeverOMP: 0.32431 ms Auto: 0.36745 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.40343 ms Auto: 0.45430 ms (AlwaysOMP) NeverOMP: 3.50668 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.63273 ms (AlwaysOMP) AlwaysOMP: 1.99602 ms NeverOMP: 5.61935 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 4.72035 ms Auto: 4.76982 ms (AlwaysOMP) NeverOMP: 45.22244 ms [1,1,28,28] lhs=784 items (Backward) Auto: 0.00052 ms (NeverOMP) NeverOMP: 0.00077 ms AlwaysOMP: 0.01212 ms [1,3,28,28] lhs=2,352 items (Backward) Auto: 0.00161 ms (NeverOMP) NeverOMP: 0.00175 ms AlwaysOMP: 0.01098 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.01560 ms AlwaysOMP: 0.07547 ms Auto: 0.30127 ms (AlwaysOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.04790 ms Auto: 0.04824 ms (AlwaysOMP) NeverOMP: 0.16509 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.06901 ms (NeverOMP) NeverOMP: 0.26659 ms AlwaysOMP: 1.28594 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 0.59342 ms (AlwaysOMP) AlwaysOMP: 0.61655 ms NeverOMP: 3.44818 ms ****************************** Operators: elemwise_add, _backward_add for type: float ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.00022 ms (NeverOMP) NeverOMP: 0.00022 ms AlwaysOMP: 0.01320 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.00050 ms (NeverOMP) NeverOMP: 0.00050 ms AlwaysOMP: 0.01639 ms [50,1,18,32] lhs=28,800 items (Forward) NeverOMP: 0.00967 ms Auto: 0.00976 ms (NeverOMP) AlwaysOMP: 0.13730 ms [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.07592 ms (AlwaysOMP) AlwaysOMP: 0.13392 ms NeverOMP: 0.14286 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.09538 ms (AlwaysOMP) AlwaysOMP: 0.16401 ms NeverOMP: 0.22948 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 0.91316 ms Auto: 1.41485 ms (AlwaysOMP) NeverOMP: 3.73508 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00020 ms Auto: 0.00022 ms (NeverOMP) AlwaysOMP: 0.02659 ms [1,3,28,28] lhs=2,352 items (Backward) Auto: 0.00061 ms (NeverOMP) NeverOMP: 0.00062 ms AlwaysOMP: 0.02172 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.01316 ms Auto: 0.01328 ms (NeverOMP) AlwaysOMP: 0.16724 ms [25,3,64,64] lhs=307,200 items (Backward) Auto: 0.05203 ms (AlwaysOMP) AlwaysOMP: 0.06131 ms NeverOMP: 0.17841 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.06787 ms (AlwaysOMP) AlwaysOMP: 0.07053 ms NeverOMP: 0.28583 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 0.86155 ms Auto: 1.93980 ms (AlwaysOMP) NeverOMP: 4.71381 ms ****************************** Operators: elemwise_mul, _backward_mul for type: float ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00020 ms Auto: 0.00021 ms (NeverOMP) AlwaysOMP: 0.01167 ms [1,3,28,28] lhs=2,352 items (Forward) NeverOMP: 0.00049 ms Auto: 0.00050 ms (NeverOMP) AlwaysOMP: 0.01483 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.01169 ms (NeverOMP) NeverOMP: 0.01178 ms AlwaysOMP: 0.79230 ms [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.06432 ms Auto: 0.09162 ms (AlwaysOMP) NeverOMP: 0.14251 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 0.15359 ms Auto: 0.22631 ms (NeverOMP) NeverOMP: 0.24137 ms *** WARNING: Wrong OMP state selected *** [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 0.73271 ms Auto: 1.77178 ms (AlwaysOMP) NeverOMP: 3.87031 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00031 ms Auto: 0.00033 ms (NeverOMP) AlwaysOMP: 0.02343 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00091 ms Auto: 0.00095 ms (NeverOMP) AlwaysOMP: 0.02028 ms [50,1,18,32] lhs=28,800 items (Backward) Auto: 0.02208 ms (NeverOMP) NeverOMP: 0.02217 ms AlwaysOMP: 1.02931 ms [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.06572 ms Auto: 0.06830 ms (AlwaysOMP) NeverOMP: 0.24432 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.13269 ms (AlwaysOMP) AlwaysOMP: 0.18089 ms NeverOMP: 0.40097 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 3.43456 ms Auto: 4.17172 ms (AlwaysOMP) NeverOMP: 6.87194 ms ****************************** Operators: elemwise_div, _backward_div for type: float ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00052 ms Auto: 0.00055 ms (NeverOMP) AlwaysOMP: 0.01196 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.00139 ms (NeverOMP) NeverOMP: 0.00141 ms AlwaysOMP: 0.01440 ms [50,1,18,32] lhs=28,800 items (Forward) NeverOMP: 0.01807 ms AlwaysOMP: 0.02894 ms Auto: 0.09708 ms (AlwaysOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Forward) NeverOMP: 0.19007 ms Auto: 0.21273 ms (AlwaysOMP) AlwaysOMP: 0.22055 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.11796 ms (AlwaysOMP) AlwaysOMP: 0.21508 ms NeverOMP: 0.31321 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 0.81411 ms (AlwaysOMP) AlwaysOMP: 1.08806 ms NeverOMP: 3.86872 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00093 ms Auto: 0.00099 ms (NeverOMP) AlwaysOMP: 0.02483 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00263 ms Auto: 0.00267 ms (NeverOMP) AlwaysOMP: 0.02132 ms [50,1,18,32] lhs=28,800 items (Backward) AlwaysOMP: 0.02501 ms NeverOMP: 0.03186 ms Auto: 0.12954 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Backward) Auto: 0.21537 ms (AlwaysOMP) AlwaysOMP: 0.30311 ms NeverOMP: 0.33987 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.12973 ms (AlwaysOMP) AlwaysOMP: 0.16991 ms NeverOMP: 0.55671 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 3.60176 ms (AlwaysOMP) AlwaysOMP: 3.88339 ms NeverOMP: 7.86455 ms Success rate for type float: 0.90278 [ OK ] OMP_TUNING.EvaluateTuneTestFloat (2659740 ms) [ RUN ] OMP_TUNING.EvaluateTuneTestDouble ****************************** Operators: relu, for type: double ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.01231 ms AlwaysOMP: 0.01291 ms Auto: 0.64883 ms (AlwaysOMP) *** WARNING: Wrong OMP state selected *** [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.01006 ms (AlwaysOMP) AlwaysOMP: 0.01108 ms NeverOMP: 0.01116 ms [50,1,18,32] lhs=28,800 items (Forward) NeverOMP: 0.04952 ms AlwaysOMP: 0.23854 ms Auto: 0.25969 ms (AlwaysOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.10319 ms NeverOMP: 0.11228 ms Auto: 0.12168 ms (NeverOMP) [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.11005 ms (NeverOMP) NeverOMP: 0.11388 ms AlwaysOMP: 0.12235 ms [20,3,256,256] lhs=3,932,160 items (Forward) NeverOMP: 1.18216 ms Auto: 1.29683 ms (NeverOMP) AlwaysOMP: 1.86238 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00070 ms Auto: 0.00074 ms (NeverOMP) AlwaysOMP: 0.01238 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00181 ms Auto: 0.00189 ms (NeverOMP) AlwaysOMP: 0.01149 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.02611 ms AlwaysOMP: 0.16457 ms Auto: 0.23180 ms (AlwaysOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.06236 ms Auto: 0.06312 ms (AlwaysOMP) NeverOMP: 0.26942 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.09348 ms (AlwaysOMP) AlwaysOMP: 0.09556 ms NeverOMP: 0.42475 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 4.09750 ms (AlwaysOMP) AlwaysOMP: 4.46106 ms NeverOMP: 6.96236 ms ****************************** Operators: sigmoid, for type: double ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) AlwaysOMP: 0.01446 ms NeverOMP: 0.06756 ms Auto: 3.67305 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01836 ms NeverOMP: 0.20214 ms Auto: 2.10966 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.14297 ms (AlwaysOMP) AlwaysOMP: 0.15284 ms NeverOMP: 2.48021 ms [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 2.23927 ms Auto: 2.30439 ms (AlwaysOMP) NeverOMP: 26.39251 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 3.63259 ms Auto: 3.70522 ms (AlwaysOMP) NeverOMP: 42.20685 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 18.12477 ms Auto: 18.21174 ms (AlwaysOMP) NeverOMP: 338.51523 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00036 ms Auto: 0.00073 ms (NeverOMP) AlwaysOMP: 0.01226 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00114 ms Auto: 0.00171 ms (NeverOMP) AlwaysOMP: 0.01089 ms [50,1,18,32] lhs=28,800 items (Backward) AlwaysOMP: 0.01255 ms Auto: 0.01256 ms (AlwaysOMP) NeverOMP: 0.02261 ms [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.05870 ms Auto: 0.11129 ms (AlwaysOMP) NeverOMP: 0.24777 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.09064 ms NeverOMP: 0.42901 ms Auto: 0.45738 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 4.16618 ms Auto: 4.36901 ms (AlwaysOMP) NeverOMP: 6.97601 ms ****************************** Operators: sqrt, for type: double ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00404 ms Auto: 0.00559 ms (NeverOMP) AlwaysOMP: 0.01396 ms [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01227 ms NeverOMP: 0.01748 ms Auto: 0.02051 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.04786 ms (AlwaysOMP) AlwaysOMP: 0.32350 ms NeverOMP: 0.34281 ms [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.44394 ms (AlwaysOMP) AlwaysOMP: 0.44560 ms NeverOMP: 3.70093 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 0.66167 ms Auto: 0.73043 ms (AlwaysOMP) NeverOMP: 5.95128 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 4.97652 ms (AlwaysOMP) AlwaysOMP: 5.54879 ms NeverOMP: 48.06680 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00175 ms Auto: 0.00247 ms (NeverOMP) AlwaysOMP: 0.01248 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00511 ms Auto: 0.00600 ms (NeverOMP) AlwaysOMP: 0.01276 ms [50,1,18,32] lhs=28,800 items (Backward) Auto: 0.01760 ms (NeverOMP) NeverOMP: 0.06169 ms AlwaysOMP: 0.30600 ms [25,3,64,64] lhs=307,200 items (Backward) Auto: 0.12501 ms (AlwaysOMP) AlwaysOMP: 0.12638 ms NeverOMP: 0.65822 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.19655 ms Auto: 0.28270 ms (AlwaysOMP) NeverOMP: 1.05510 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 4.15841 ms (AlwaysOMP) AlwaysOMP: 4.40286 ms NeverOMP: 8.74886 ms ****************************** Operators: elemwise_add, _backward_add for type: double ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00034 ms Auto: 0.00035 ms (NeverOMP) AlwaysOMP: 0.01223 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.00114 ms (NeverOMP) NeverOMP: 0.00118 ms AlwaysOMP: 0.01138 ms [50,1,18,32] lhs=28,800 items (Forward) NeverOMP: 0.02635 ms Auto: 0.02653 ms (NeverOMP) AlwaysOMP: 0.14712 ms [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.10704 ms Auto: 0.25234 ms (NeverOMP) NeverOMP: 0.28560 ms *** WARNING: Wrong OMP state selected *** [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 0.13664 ms Auto: 0.29605 ms (AlwaysOMP) NeverOMP: 0.49628 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 3.36596 ms Auto: 3.40262 ms (AlwaysOMP) NeverOMP: 7.59803 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00040 ms Auto: 0.00041 ms (NeverOMP) AlwaysOMP: 0.02417 ms [1,3,28,28] lhs=2,352 items (Backward) Auto: 0.00156 ms (NeverOMP) NeverOMP: 0.00159 ms AlwaysOMP: 0.02110 ms [50,1,18,32] lhs=28,800 items (Backward) Auto: 0.03318 ms (NeverOMP) NeverOMP: 0.03328 ms AlwaysOMP: 0.16459 ms [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.08291 ms Auto: 0.11832 ms (AlwaysOMP) NeverOMP: 0.35871 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.12856 ms Auto: 0.14460 ms (AlwaysOMP) NeverOMP: 0.61437 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 5.80656 ms (AlwaysOMP) AlwaysOMP: 5.92776 ms NeverOMP: 10.08328 ms ****************************** Operators: elemwise_mul, _backward_mul for type: double ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.00035 ms (NeverOMP) NeverOMP: 0.00037 ms AlwaysOMP: 0.01251 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.00114 ms (NeverOMP) NeverOMP: 0.00119 ms AlwaysOMP: 0.01653 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.02269 ms (NeverOMP) NeverOMP: 0.02282 ms AlwaysOMP: 0.89855 ms [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.10723 ms Auto: 0.11643 ms (AlwaysOMP) NeverOMP: 0.30169 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 0.12962 ms Auto: 0.13127 ms (AlwaysOMP) NeverOMP: 0.53828 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 3.40119 ms Auto: 4.13266 ms (AlwaysOMP) NeverOMP: 7.69186 ms [1,1,28,28] lhs=784 items (Backward) Auto: 0.00058 ms (NeverOMP) NeverOMP: 0.00065 ms AlwaysOMP: 0.02377 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00205 ms Auto: 0.00208 ms (NeverOMP) AlwaysOMP: 0.02057 ms [50,1,18,32] lhs=28,800 items (Backward) Auto: 0.04450 ms (NeverOMP) NeverOMP: 0.04545 ms AlwaysOMP: 1.00807 ms [25,3,64,64] lhs=307,200 items (Backward) Auto: 0.11819 ms (AlwaysOMP) AlwaysOMP: 0.12076 ms NeverOMP: 0.50590 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.17710 ms Auto: 0.19027 ms (AlwaysOMP) NeverOMP: 0.96795 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 8.91745 ms (AlwaysOMP) AlwaysOMP: 9.13549 ms NeverOMP: 13.90586 ms ****************************** Operators: elemwise_div, _backward_div for type: double ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00176 ms Auto: 0.00180 ms (NeverOMP) AlwaysOMP: 0.01206 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.00514 ms (NeverOMP) NeverOMP: 0.00520 ms AlwaysOMP: 0.01556 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.05224 ms (NeverOMP) NeverOMP: 0.06227 ms AlwaysOMP: 0.29541 ms [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.17669 ms Auto: 0.18414 ms (AlwaysOMP) NeverOMP: 0.72627 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 0.23446 ms Auto: 0.23679 ms (AlwaysOMP) NeverOMP: 1.19503 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 3.87970 ms (AlwaysOMP) AlwaysOMP: 4.26268 ms NeverOMP: 9.60302 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00345 ms Auto: 0.00355 ms (NeverOMP) AlwaysOMP: 0.02349 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.01018 ms Auto: 0.01027 ms (NeverOMP) AlwaysOMP: 0.02192 ms [50,1,18,32] lhs=28,800 items (Backward) Auto: 0.03786 ms (NeverOMP) NeverOMP: 0.12486 ms AlwaysOMP: 0.49639 ms [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.24824 ms Auto: 0.24917 ms (AlwaysOMP) NeverOMP: 1.33615 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.38595 ms (AlwaysOMP) AlwaysOMP: 0.38854 ms NeverOMP: 2.15919 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 10.12301 ms Auto: 10.23617 ms (AlwaysOMP) NeverOMP: 18.17344 ms Success rate for type double: 0.88889 [ OK ] OMP_TUNING.EvaluateTuneTestDouble (2873781 ms) [ RUN ] OMP_TUNING.EvaluateTuneTestFloat16 ****************************** Operators: relu, for type: mshadow::half::half_t ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) AlwaysOMP: 0.01252 ms NeverOMP: 0.01254 ms Auto: 0.01282 ms (NeverOMP) [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01127 ms Auto: 0.01176 ms (NeverOMP) NeverOMP: 0.01198 ms [50,1,18,32] lhs=28,800 items (Forward) AlwaysOMP: 0.04113 ms NeverOMP: 0.04217 ms Auto: 0.60660 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.25456 ms NeverOMP: 0.25869 ms Auto: 0.28440 ms (NeverOMP) [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.37191 ms (AlwaysOMP) AlwaysOMP: 0.37813 ms NeverOMP: 0.38086 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 2.75328 ms (AlwaysOMP) AlwaysOMP: 2.90596 ms NeverOMP: 3.34627 ms [1,1,28,28] lhs=784 items (Backward) Auto: 0.00487 ms (NeverOMP) NeverOMP: 0.00502 ms AlwaysOMP: 0.01354 ms [1,3,28,28] lhs=2,352 items (Backward) AlwaysOMP: 0.01159 ms Auto: 0.01426 ms (NeverOMP) NeverOMP: 0.01428 ms [50,1,18,32] lhs=28,800 items (Backward) AlwaysOMP: 0.02205 ms NeverOMP: 0.17007 ms Auto: 0.17041 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.17643 ms Auto: 0.18014 ms (AlwaysOMP) NeverOMP: 1.36678 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.27613 ms (AlwaysOMP) AlwaysOMP: 0.27733 ms NeverOMP: 2.14243 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 1.84882 ms (AlwaysOMP) AlwaysOMP: 1.96296 ms NeverOMP: 17.23174 ms ****************************** Operators: sigmoid, for type: mshadow::half::half_t ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) AlwaysOMP: 0.01798 ms NeverOMP: 0.05002 ms Auto: 0.07911 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01914 ms NeverOMP: 0.14894 ms Auto: 1.55216 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [50,1,18,32] lhs=28,800 items (Forward) AlwaysOMP: 0.13877 ms NeverOMP: 1.82655 ms Auto: 2.34716 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 1.90271 ms Auto: 1.96576 ms (AlwaysOMP) NeverOMP: 19.46052 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 3.43295 ms (AlwaysOMP) AlwaysOMP: 3.50219 ms NeverOMP: 31.01322 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 18.20243 ms Auto: 18.73620 ms (AlwaysOMP) NeverOMP: 247.66399 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00626 ms Auto: 0.00830 ms (NeverOMP) AlwaysOMP: 0.01404 ms [1,3,28,28] lhs=2,352 items (Backward) AlwaysOMP: 0.01204 ms NeverOMP: 0.01874 ms Auto: 0.02282 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [50,1,18,32] lhs=28,800 items (Backward) AlwaysOMP: 0.02567 ms NeverOMP: 0.22798 ms Auto: 0.28531 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.28227 ms Auto: 0.37852 ms (AlwaysOMP) NeverOMP: 2.42536 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.51133 ms (AlwaysOMP) AlwaysOMP: 0.55373 ms NeverOMP: 3.86420 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 2.53404 ms (AlwaysOMP) AlwaysOMP: 2.70469 ms NeverOMP: 30.87774 ms ****************************** Operators: sqrt, for type: mshadow::half::half_t ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) AlwaysOMP: 0.01532 ms Auto: 0.01662 ms (AlwaysOMP) NeverOMP: 0.02072 ms [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01480 ms Auto: 0.01597 ms (AlwaysOMP) NeverOMP: 0.05913 ms [50,1,18,32] lhs=28,800 items (Forward) AlwaysOMP: 0.19763 ms Auto: 0.49256 ms (NeverOMP) NeverOMP: 0.71079 ms *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.96153 ms Auto: 1.05745 ms (AlwaysOMP) NeverOMP: 7.59227 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 1.57041 ms Auto: 1.65070 ms (AlwaysOMP) NeverOMP: 12.16232 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 10.01931 ms Auto: 10.22567 ms (AlwaysOMP) NeverOMP: 97.41398 ms [1,1,28,28] lhs=784 items (Backward) Auto: 0.00659 ms (NeverOMP) NeverOMP: 0.00811 ms AlwaysOMP: 0.01394 ms [1,3,28,28] lhs=2,352 items (Backward) AlwaysOMP: 0.01243 ms NeverOMP: 0.02033 ms Auto: 0.02449 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [50,1,18,32] lhs=28,800 items (Backward) AlwaysOMP: 0.02942 ms NeverOMP: 0.23440 ms Auto: 0.27738 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.31331 ms Auto: 0.39453 ms (AlwaysOMP) NeverOMP: 2.49712 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.45829 ms Auto: 0.48572 ms (AlwaysOMP) NeverOMP: 4.00355 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 2.52095 ms Auto: 2.56029 ms (AlwaysOMP) NeverOMP: 32.01943 ms ****************************** Operators: elemwise_add, _backward_add for type: mshadow::half::half_t ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.00339 ms (NeverOMP) NeverOMP: 0.00339 ms AlwaysOMP: 0.01349 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.00981 ms (NeverOMP) NeverOMP: 0.00989 ms AlwaysOMP: 0.01154 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.11873 ms (NeverOMP) NeverOMP: 0.11935 ms AlwaysOMP: 0.21125 ms [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.36312 ms (AlwaysOMP) AlwaysOMP: 0.45465 ms NeverOMP: 1.27802 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.29832 ms (AlwaysOMP) AlwaysOMP: 0.35686 ms NeverOMP: 2.04489 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 2.23637 ms Auto: 2.36566 ms (AlwaysOMP) NeverOMP: 16.45618 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00056 ms Auto: 0.00060 ms (NeverOMP) AlwaysOMP: 0.02367 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00155 ms Auto: 0.00163 ms (NeverOMP) AlwaysOMP: 0.02082 ms [50,1,18,32] lhs=28,800 items (Backward) Auto: 0.02367 ms (NeverOMP) NeverOMP: 0.02376 ms AlwaysOMP: 0.17247 ms [25,3,64,64] lhs=307,200 items (Backward) Auto: 0.11430 ms (AlwaysOMP) AlwaysOMP: 0.21681 ms NeverOMP: 0.25327 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.08210 ms (AlwaysOMP) AlwaysOMP: 0.08335 ms NeverOMP: 0.40585 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 0.52975 ms Auto: 0.54464 ms (AlwaysOMP) NeverOMP: 3.29317 ms ****************************** Operators: elemwise_mul, _backward_mul for type: mshadow::half::half_t ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.00346 ms (NeverOMP) NeverOMP: 0.00359 ms AlwaysOMP: 0.01318 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.01015 ms (NeverOMP) NeverOMP: 0.01017 ms AlwaysOMP: 0.01858 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.12120 ms (NeverOMP) NeverOMP: 0.12243 ms AlwaysOMP: 0.89740 ms [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.19254 ms Auto: 0.27997 ms (AlwaysOMP) NeverOMP: 1.29081 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 0.29101 ms Auto: 0.30168 ms (AlwaysOMP) NeverOMP: 2.08216 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 2.63992 ms (AlwaysOMP) AlwaysOMP: 2.92110 ms NeverOMP: 16.43580 ms [1,1,28,28] lhs=784 items (Backward) Auto: 0.02870 ms (AlwaysOMP) AlwaysOMP: 0.02878 ms NeverOMP: 0.03015 ms [1,3,28,28] lhs=2,352 items (Backward) AlwaysOMP: 0.02608 ms NeverOMP: 0.08578 ms Auto: 0.08580 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [50,1,18,32] lhs=28,800 items (Backward) Auto: 1.05000 ms (NeverOMP) NeverOMP: 1.05704 ms AlwaysOMP: 1.66575 ms [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 1.49586 ms Auto: 1.59033 ms (AlwaysOMP) NeverOMP: 11.05325 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 2.34071 ms (AlwaysOMP) AlwaysOMP: 2.40784 ms NeverOMP: 17.80856 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 13.36025 ms (AlwaysOMP) AlwaysOMP: 13.74791 ms NeverOMP: 141.20115 ms ****************************** Operators: elemwise_div, _backward_div for type: mshadow::half::half_t ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.00381 ms (NeverOMP) NeverOMP: 0.00383 ms AlwaysOMP: 0.01274 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.01124 ms (NeverOMP) NeverOMP: 0.01127 ms AlwaysOMP: 0.01820 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.13582 ms (NeverOMP) NeverOMP: 0.13688 ms AlwaysOMP: 0.31810 ms [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.21464 ms Auto: 0.36015 ms (AlwaysOMP) NeverOMP: 1.47631 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 0.37944 ms Auto: 0.60854 ms (AlwaysOMP) NeverOMP: 2.36418 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 2.62599 ms Auto: 3.17899 ms (AlwaysOMP) NeverOMP: 18.82332 ms [1,1,28,28] lhs=784 items (Backward) Auto: 0.01382 ms (NeverOMP) NeverOMP: 0.01389 ms AlwaysOMP: 0.02699 ms [1,3,28,28] lhs=2,352 items (Backward) AlwaysOMP: 0.02427 ms Auto: 0.04105 ms (NeverOMP) NeverOMP: 0.04121 ms *** WARNING: Wrong OMP state selected *** [50,1,18,32] lhs=28,800 items (Backward) AlwaysOMP: 0.41580 ms Auto: 0.49959 ms (NeverOMP) NeverOMP: 0.50103 ms [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.70414 ms Auto: 0.73576 ms (AlwaysOMP) NeverOMP: 5.34360 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 1.10432 ms Auto: 1.24694 ms (AlwaysOMP) NeverOMP: 8.56778 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 6.42306 ms Auto: 6.57874 ms (AlwaysOMP) NeverOMP: 68.38768 ms Success rate for type mshadow::half::half_t: 0.83333 [ OK ] OMP_TUNING.EvaluateTuneTestFloat16 (3410269 ms) [ RUN ] OMP_TUNING.EvaluateTuneTestInt8 ****************************** Operators: relu, for type: unsigned char ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.01204 ms (NeverOMP) NeverOMP: 0.01243 ms AlwaysOMP: 0.01337 ms [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01054 ms NeverOMP: 0.01103 ms Auto: 0.01138 ms (NeverOMP) [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.12353 ms (NeverOMP) NeverOMP: 0.14061 ms AlwaysOMP: 0.26674 ms [25,3,64,64] lhs=307,200 items (Forward) NeverOMP: 0.05610 ms Auto: 0.06452 ms (NeverOMP) AlwaysOMP: 0.09663 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.09136 ms (AlwaysOMP) AlwaysOMP: 0.10693 ms NeverOMP: 0.16486 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 0.43558 ms (AlwaysOMP) AlwaysOMP: 0.43619 ms NeverOMP: 0.56705 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00032 ms Auto: 0.00037 ms (NeverOMP) AlwaysOMP: 0.01326 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00065 ms Auto: 0.00068 ms (NeverOMP) AlwaysOMP: 0.01105 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.00515 ms Auto: 0.08562 ms (NeverOMP) AlwaysOMP: 0.22164 ms [25,3,64,64] lhs=307,200 items (Backward) NeverOMP: 0.05499 ms AlwaysOMP: 0.05863 ms Auto: 0.06179 ms (AlwaysOMP) [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.08669 ms NeverOMP: 0.08782 ms Auto: 0.10840 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [20,3,256,256] lhs=3,932,160 items (Backward) NeverOMP: 0.61030 ms Auto: 0.63073 ms (AlwaysOMP) AlwaysOMP: 0.63093 ms ****************************** Operators: sigmoid, for type: unsigned char ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.00980 ms (NeverOMP) NeverOMP: 0.01373 ms AlwaysOMP: 0.01650 ms [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01406 ms Auto: 0.01543 ms (AlwaysOMP) NeverOMP: 0.03544 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.31022 ms (NeverOMP) NeverOMP: 0.38270 ms AlwaysOMP: 0.50504 ms [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.63484 ms Auto: 0.69292 ms (AlwaysOMP) NeverOMP: 4.07100 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 0.91142 ms Auto: 1.26824 ms (AlwaysOMP) NeverOMP: 6.51999 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 6.61569 ms (AlwaysOMP) AlwaysOMP: 6.66478 ms NeverOMP: 52.13156 ms [1,1,28,28] lhs=784 items (Backward) Auto: 0.00054 ms (NeverOMP) NeverOMP: 0.00076 ms AlwaysOMP: 0.01402 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00167 ms Auto: 0.00183 ms (NeverOMP) AlwaysOMP: 0.01174 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.01595 ms Auto: 0.18269 ms (NeverOMP) AlwaysOMP: 0.38251 ms [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.08241 ms NeverOMP: 0.17029 ms Auto: 0.22272 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.12309 ms NeverOMP: 0.27122 ms Auto: 0.48192 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 0.98750 ms (AlwaysOMP) AlwaysOMP: 1.07169 ms NeverOMP: 2.19638 ms ****************************** Operators: sqrt, for type: unsigned char ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00176 ms Auto: 0.00179 ms (NeverOMP) AlwaysOMP: 0.01462 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.00519 ms (NeverOMP) NeverOMP: 0.00529 ms AlwaysOMP: 0.01111 ms [50,1,18,32] lhs=28,800 items (Forward) NeverOMP: 0.06338 ms Auto: 0.17961 ms (NeverOMP) AlwaysOMP: 0.71542 ms [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.13228 ms Auto: 0.18874 ms (AlwaysOMP) NeverOMP: 0.67359 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.21040 ms (AlwaysOMP) AlwaysOMP: 0.24252 ms NeverOMP: 1.07736 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 1.38032 ms Auto: 1.58260 ms (AlwaysOMP) NeverOMP: 8.72154 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00051 ms Auto: 0.00053 ms (NeverOMP) AlwaysOMP: 0.01421 ms [1,3,28,28] lhs=2,352 items (Backward) Auto: 0.00145 ms (NeverOMP) NeverOMP: 0.00147 ms AlwaysOMP: 0.01196 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.01633 ms Auto: 0.16087 ms (NeverOMP) AlwaysOMP: 0.41953 ms [25,3,64,64] lhs=307,200 items (Backward) Auto: 0.12017 ms (AlwaysOMP) AlwaysOMP: 0.15077 ms NeverOMP: 0.17212 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.18389 ms Auto: 0.18422 ms (AlwaysOMP) NeverOMP: 0.27528 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 1.35693 ms Auto: 1.38488 ms (AlwaysOMP) NeverOMP: 2.21104 ms ****************************** Operators: elemwise_add, _backward_add for type: unsigned char ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.00013 ms (NeverOMP) NeverOMP: 0.00017 ms AlwaysOMP: 0.01204 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.00019 ms (NeverOMP) NeverOMP: 0.00022 ms AlwaysOMP: 0.01162 ms [50,1,18,32] lhs=28,800 items (Forward) NeverOMP: 0.00175 ms Auto: 0.00180 ms (NeverOMP) AlwaysOMP: 0.02564 ms [25,3,64,64] lhs=307,200 items (Forward) NeverOMP: 0.03057 ms Auto: 0.07261 ms (NeverOMP) AlwaysOMP: 0.11983 ms [10,3,128,128] lhs=491,520 items (Forward) NeverOMP: 0.05596 ms Auto: 0.10967 ms (AlwaysOMP) AlwaysOMP: 0.11079 ms *** WARNING: Wrong OMP state selected *** [20,3,256,256] lhs=3,932,160 items (Forward) NeverOMP: 0.53512 ms Auto: 0.73257 ms (NeverOMP) AlwaysOMP: 0.96106 ms [1,1,28,28] lhs=784 items (Backward) Auto: 0.00017 ms (NeverOMP) NeverOMP: 0.00017 ms AlwaysOMP: 0.02444 ms [1,3,28,28] lhs=2,352 items (Backward) Auto: 0.00020 ms (NeverOMP) NeverOMP: 0.00023 ms AlwaysOMP: 0.02157 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.00247 ms Auto: 0.00249 ms (NeverOMP) AlwaysOMP: 0.02519 ms [25,3,64,64] lhs=307,200 items (Backward) NeverOMP: 0.04474 ms AlwaysOMP: 0.08410 ms Auto: 0.09190 ms (AlwaysOMP) *** WARNING: Wrong OMP state selected *** [10,3,128,128] lhs=491,520 items (Backward) NeverOMP: 0.07080 ms AlwaysOMP: 0.12211 ms Auto: 0.12234 ms (AlwaysOMP) *** WARNING: Wrong OMP state selected *** [20,3,256,256] lhs=3,932,160 items (Backward) NeverOMP: 0.63505 ms Auto: 0.78678 ms (NeverOMP) AlwaysOMP: 1.08374 ms ****************************** Operators: elemwise_mul, _backward_mul for type: unsigned char ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.00019 ms (NeverOMP) NeverOMP: 0.00019 ms AlwaysOMP: 0.09881 ms [1,3,28,28] lhs=2,352 items (Forward) NeverOMP: 0.00035 ms Auto: 0.00039 ms (NeverOMP) AlwaysOMP: 0.02718 ms [50,1,18,32] lhs=28,800 items (Forward) NeverOMP: 0.00317 ms Auto: 0.00321 ms (NeverOMP) AlwaysOMP: 0.29173 ms [25,3,64,64] lhs=307,200 items (Forward) NeverOMP: 0.04596 ms Auto: 0.07970 ms (AlwaysOMP) AlwaysOMP: 0.10948 ms *** WARNING: Wrong OMP state selected *** [10,3,128,128] lhs=491,520 items (Forward) NeverOMP: 0.07292 ms Auto: 0.11031 ms (AlwaysOMP) AlwaysOMP: 0.11079 ms *** WARNING: Wrong OMP state selected *** [20,3,256,256] lhs=3,932,160 items (Forward) NeverOMP: 0.68165 ms Auto: 0.72266 ms (AlwaysOMP) AlwaysOMP: 0.72718 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00023 ms Auto: 0.00026 ms (NeverOMP) AlwaysOMP: 0.16520 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00056 ms Auto: 0.00057 ms (NeverOMP) AlwaysOMP: 0.02093 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.00616 ms Auto: 0.00618 ms (NeverOMP) AlwaysOMP: 0.34041 ms [25,3,64,64] lhs=307,200 items (Backward) NeverOMP: 0.08047 ms AlwaysOMP: 0.11465 ms Auto: 0.11473 ms (AlwaysOMP) *** WARNING: Wrong OMP state selected *** [10,3,128,128] lhs=491,520 items (Backward) NeverOMP: 0.12707 ms Auto: 0.17122 ms (AlwaysOMP) AlwaysOMP: 0.17214 ms *** WARNING: Wrong OMP state selected *** [20,3,256,256] lhs=3,932,160 items (Backward) NeverOMP: 1.19277 ms Auto: 1.21995 ms (NeverOMP) AlwaysOMP: 1.31694 ms ****************************** Operators: elemwise_div, _backward_div for type: unsigned char ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00200 ms Auto: 0.00207 ms (NeverOMP) AlwaysOMP: 0.01329 ms [1,3,28,28] lhs=2,352 items (Forward) NeverOMP: 0.00582 ms Auto: 0.00594 ms (NeverOMP) AlwaysOMP: 0.01795 ms [50,1,18,32] lhs=28,800 items (Forward) AlwaysOMP: 0.03412 ms Auto: 0.03613 ms (AlwaysOMP) NeverOMP: 0.07194 ms [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.15304 ms (AlwaysOMP) AlwaysOMP: 0.21845 ms NeverOMP: 0.76740 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.22607 ms (AlwaysOMP) AlwaysOMP: 0.22911 ms NeverOMP: 1.22773 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 1.56557 ms (AlwaysOMP) AlwaysOMP: 1.70273 ms NeverOMP: 9.86493 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00107 ms Auto: 0.00109 ms (NeverOMP) AlwaysOMP: 0.02995 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00300 ms Auto: 0.00304 ms (NeverOMP) AlwaysOMP: 0.02194 ms [50,1,18,32] lhs=28,800 items (Backward) AlwaysOMP: 0.03570 ms NeverOMP: 0.03693 ms Auto: 0.03711 ms (NeverOMP) [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.23607 ms Auto: 0.23800 ms (AlwaysOMP) NeverOMP: 0.38447 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.36417 ms Auto: 0.36613 ms (AlwaysOMP) NeverOMP: 0.61532 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 2.46650 ms (AlwaysOMP) AlwaysOMP: 2.57598 ms NeverOMP: 4.98109 ms Success rate for type unsigned char: 0.86111 [ OK ] OMP_TUNING.EvaluateTuneTestInt8 (2707185 ms) [ RUN ] OMP_TUNING.EvaluateTuneTestInt32 ****************************** Operators: relu, for type: int ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.01188 ms (NeverOMP) NeverOMP: 0.01194 ms AlwaysOMP: 0.01271 ms [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01091 ms Auto: 0.01111 ms (NeverOMP) NeverOMP: 0.01127 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.02688 ms (AlwaysOMP) AlwaysOMP: 0.37070 ms NeverOMP: 0.51536 ms [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.06376 ms (AlwaysOMP) AlwaysOMP: 0.06974 ms NeverOMP: 0.07645 ms [10,3,128,128] lhs=491,520 items (Forward) NeverOMP: 0.08585 ms Auto: 0.08596 ms (NeverOMP) AlwaysOMP: 0.08777 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 0.31072 ms (AlwaysOMP) AlwaysOMP: 0.31122 ms NeverOMP: 0.31545 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00062 ms Auto: 0.00064 ms (NeverOMP) AlwaysOMP: 0.01187 ms [1,3,28,28] lhs=2,352 items (Backward) Auto: 0.00148 ms (NeverOMP) NeverOMP: 0.00151 ms AlwaysOMP: 0.01085 ms [50,1,18,32] lhs=28,800 items (Backward) Auto: 0.01193 ms (NeverOMP) NeverOMP: 0.01747 ms AlwaysOMP: 0.22900 ms [25,3,64,64] lhs=307,200 items (Backward) Auto: 0.03840 ms (AlwaysOMP) AlwaysOMP: 0.03853 ms NeverOMP: 0.19392 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.05553 ms Auto: 0.05567 ms (AlwaysOMP) NeverOMP: 0.29659 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 0.52947 ms (AlwaysOMP) AlwaysOMP: 0.53544 ms NeverOMP: 3.56144 ms ****************************** Operators: sigmoid, for type: int ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00944 ms Auto: 0.00955 ms (NeverOMP) AlwaysOMP: 0.01385 ms [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01280 ms Auto: 0.01568 ms (AlwaysOMP) NeverOMP: 0.03059 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.14174 ms (AlwaysOMP) AlwaysOMP: 0.34073 ms NeverOMP: 0.40348 ms [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.58693 ms (AlwaysOMP) AlwaysOMP: 0.58754 ms NeverOMP: 4.35948 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.91874 ms (AlwaysOMP) AlwaysOMP: 1.02512 ms NeverOMP: 6.96376 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 6.35411 ms Auto: 6.55719 ms (AlwaysOMP) NeverOMP: 56.10402 ms [1,1,28,28] lhs=784 items (Backward) Auto: 0.00046 ms (NeverOMP) NeverOMP: 0.00047 ms AlwaysOMP: 0.01252 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00118 ms Auto: 0.00175 ms (NeverOMP) AlwaysOMP: 0.01106 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.01619 ms Auto: 0.03206 ms (NeverOMP) AlwaysOMP: 0.30180 ms [25,3,64,64] lhs=307,200 items (Backward) Auto: 0.04226 ms (AlwaysOMP) AlwaysOMP: 0.04273 ms NeverOMP: 0.17773 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.06121 ms (AlwaysOMP) AlwaysOMP: 0.13222 ms NeverOMP: 0.28958 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 0.51086 ms Auto: 0.72117 ms (AlwaysOMP) NeverOMP: 3.59519 ms ****************************** Operators: sqrt, for type: int ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00370 ms Auto: 0.00386 ms (NeverOMP) AlwaysOMP: 0.01300 ms [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01123 ms Auto: 0.01692 ms (NeverOMP) NeverOMP: 0.01700 ms *** WARNING: Wrong OMP state selected *** [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.29965 ms (NeverOMP) NeverOMP: 0.32824 ms AlwaysOMP: 0.37182 ms [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.42119 ms (AlwaysOMP) AlwaysOMP: 0.42269 ms NeverOMP: 3.56290 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 0.65663 ms Auto: 0.75320 ms (AlwaysOMP) NeverOMP: 5.69756 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 4.63959 ms Auto: 4.64424 ms (AlwaysOMP) NeverOMP: 45.85740 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00052 ms Auto: 0.00053 ms (NeverOMP) AlwaysOMP: 0.01225 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00139 ms Auto: 0.00139 ms (NeverOMP) AlwaysOMP: 0.01133 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.01793 ms Auto: 0.22920 ms (AlwaysOMP) AlwaysOMP: 0.33120 ms *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.04648 ms Auto: 0.04753 ms (AlwaysOMP) NeverOMP: 0.19558 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.06828 ms Auto: 0.06977 ms (AlwaysOMP) NeverOMP: 0.31788 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 0.53011 ms Auto: 0.54268 ms (AlwaysOMP) NeverOMP: 3.66556 ms ****************************** Operators: elemwise_add, _backward_add for type: int ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00019 ms Auto: 0.00023 ms (NeverOMP) AlwaysOMP: 0.01304 ms [1,3,28,28] lhs=2,352 items (Forward) NeverOMP: 0.00047 ms Auto: 0.00049 ms (NeverOMP) AlwaysOMP: 0.01105 ms [50,1,18,32] lhs=28,800 items (Forward) NeverOMP: 0.00953 ms Auto: 0.01018 ms (NeverOMP) AlwaysOMP: 0.35903 ms [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.06004 ms (AlwaysOMP) AlwaysOMP: 0.06424 ms NeverOMP: 0.13648 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.13170 ms (AlwaysOMP) AlwaysOMP: 0.13676 ms NeverOMP: 0.22694 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 0.58064 ms Auto: 0.58767 ms (AlwaysOMP) NeverOMP: 3.68696 ms [1,1,28,28] lhs=784 items (Backward) Auto: 0.00023 ms (NeverOMP) NeverOMP: 0.00025 ms AlwaysOMP: 0.02380 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00061 ms Auto: 0.00063 ms (NeverOMP) AlwaysOMP: 0.02196 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.01312 ms Auto: 0.01404 ms (NeverOMP) AlwaysOMP: 0.45496 ms [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.05092 ms Auto: 0.05849 ms (AlwaysOMP) NeverOMP: 0.17346 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.06769 ms Auto: 0.06981 ms (AlwaysOMP) NeverOMP: 0.28245 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 0.76760 ms Auto: 0.77871 ms (AlwaysOMP) NeverOMP: 4.82562 ms ****************************** Operators: elemwise_mul, _backward_mul for type: int ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.00038 ms (NeverOMP) NeverOMP: 0.00049 ms AlwaysOMP: 0.01172 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.00093 ms (NeverOMP) NeverOMP: 0.00098 ms AlwaysOMP: 0.01854 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.01563 ms (NeverOMP) NeverOMP: 0.01591 ms AlwaysOMP: 0.10024 ms [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.18456 ms (NeverOMP) NeverOMP: 0.18718 ms AlwaysOMP: 0.20379 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 0.10292 ms Auto: 0.10390 ms (AlwaysOMP) NeverOMP: 0.31488 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 0.64271 ms (AlwaysOMP) AlwaysOMP: 0.79121 ms NeverOMP: 3.84123 ms [1,1,28,28] lhs=784 items (Backward) Auto: 0.00067 ms (NeverOMP) NeverOMP: 0.00082 ms AlwaysOMP: 0.02452 ms [1,3,28,28] lhs=2,352 items (Backward) Auto: 0.00166 ms (NeverOMP) NeverOMP: 0.00179 ms AlwaysOMP: 0.02130 ms [50,1,18,32] lhs=28,800 items (Backward) Auto: 0.02899 ms (NeverOMP) NeverOMP: 0.02924 ms AlwaysOMP: 0.05003 ms [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.07528 ms Auto: 0.22076 ms (NeverOMP) NeverOMP: 0.32319 ms *** WARNING: Wrong OMP state selected *** [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.10840 ms Auto: 0.10917 ms (AlwaysOMP) NeverOMP: 0.52678 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 3.20825 ms Auto: 3.23265 ms (AlwaysOMP) NeverOMP: 7.12689 ms ****************************** Operators: elemwise_div, _backward_div for type: int ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00203 ms Auto: 0.00207 ms (NeverOMP) AlwaysOMP: 0.01175 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.00598 ms (NeverOMP) NeverOMP: 0.00599 ms AlwaysOMP: 0.01601 ms [50,1,18,32] lhs=28,800 items (Forward) NeverOMP: 0.07468 ms Auto: 0.12859 ms (NeverOMP) AlwaysOMP: 0.32019 ms [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.21234 ms (AlwaysOMP) AlwaysOMP: 0.30844 ms NeverOMP: 0.78625 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 0.25956 ms Auto: 0.26062 ms (AlwaysOMP) NeverOMP: 1.26884 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 1.88391 ms Auto: 1.96897 ms (AlwaysOMP) NeverOMP: 10.30144 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00097 ms Auto: 0.00102 ms (NeverOMP) AlwaysOMP: 0.02430 ms [1,3,28,28] lhs=2,352 items (Backward) Auto: 0.00293 ms (NeverOMP) NeverOMP: 0.00294 ms AlwaysOMP: 0.02082 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.04071 ms Auto: 0.10969 ms (NeverOMP) AlwaysOMP: 0.52492 ms [25,3,64,64] lhs=307,200 items (Backward) Auto: 0.09188 ms (AlwaysOMP) AlwaysOMP: 0.22019 ms NeverOMP: 0.43284 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.13733 ms Auto: 0.13847 ms (AlwaysOMP) NeverOMP: 0.70627 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 3.66552 ms Auto: 3.88015 ms (AlwaysOMP) NeverOMP: 8.25349 ms Success rate for type int: 0.95833 [ OK ] OMP_TUNING.EvaluateTuneTestInt32 (2730349 ms) [ RUN ] OMP_TUNING.EvaluateTuneTestInt64 ****************************** Operators: relu, for type: long ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) AlwaysOMP: 0.01188 ms NeverOMP: 0.01205 ms Auto: 0.17143 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [1,3,28,28] lhs=2,352 items (Forward) NeverOMP: 0.01109 ms AlwaysOMP: 0.01131 ms Auto: 0.73350 ms (AlwaysOMP) *** WARNING: Wrong OMP state selected *** [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.07400 ms (AlwaysOMP) AlwaysOMP: 0.17480 ms NeverOMP: 0.18969 ms [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.10761 ms NeverOMP: 0.11079 ms Auto: 0.26266 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [10,3,128,128] lhs=491,520 items (Forward) NeverOMP: 0.12822 ms AlwaysOMP: 0.19584 ms Auto: 0.73473 ms (AlwaysOMP) *** WARNING: Wrong OMP state selected *** [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 1.28321 ms (NeverOMP) NeverOMP: 1.35374 ms AlwaysOMP: 1.69886 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00097 ms Auto: 0.00104 ms (NeverOMP) AlwaysOMP: 0.01204 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00266 ms Auto: 0.00310 ms (NeverOMP) AlwaysOMP: 0.01103 ms [50,1,18,32] lhs=28,800 items (Backward) Auto: 0.01458 ms (NeverOMP) NeverOMP: 0.03460 ms AlwaysOMP: 0.14698 ms [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.06603 ms Auto: 0.18756 ms (AlwaysOMP) NeverOMP: 0.34289 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.16079 ms NeverOMP: 0.53232 ms Auto: 0.54369 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 4.06182 ms (AlwaysOMP) AlwaysOMP: 4.34448 ms NeverOMP: 6.92864 ms ****************************** Operators: sigmoid, for type: long ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.00964 ms (NeverOMP) NeverOMP: 0.01200 ms AlwaysOMP: 0.01381 ms [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01392 ms Auto: 0.01500 ms (AlwaysOMP) NeverOMP: 0.03346 ms [50,1,18,32] lhs=28,800 items (Forward) AlwaysOMP: 0.06559 ms Auto: 0.19101 ms (AlwaysOMP) NeverOMP: 0.41206 ms [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.61782 ms Auto: 0.69495 ms (AlwaysOMP) NeverOMP: 4.45403 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 0.94409 ms Auto: 1.03096 ms (AlwaysOMP) NeverOMP: 7.13806 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 6.46679 ms (AlwaysOMP) AlwaysOMP: 6.91034 ms NeverOMP: 56.07535 ms [1,1,28,28] lhs=784 items (Backward) Auto: 0.00112 ms (NeverOMP) NeverOMP: 0.00136 ms AlwaysOMP: 0.01302 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00342 ms Auto: 0.00473 ms (NeverOMP) AlwaysOMP: 0.01180 ms [50,1,18,32] lhs=28,800 items (Backward) AlwaysOMP: 0.01615 ms NeverOMP: 0.03956 ms Auto: 0.05041 ms (NeverOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.09081 ms Auto: 0.13354 ms (AlwaysOMP) NeverOMP: 0.43253 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.13851 ms Auto: 0.18442 ms (AlwaysOMP) NeverOMP: 0.70408 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 4.24534 ms (AlwaysOMP) AlwaysOMP: 4.35450 ms NeverOMP: 7.66533 ms ****************************** Operators: sqrt, for type: long ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00382 ms Auto: 0.00483 ms (NeverOMP) AlwaysOMP: 0.01333 ms [1,3,28,28] lhs=2,352 items (Forward) AlwaysOMP: 0.01191 ms Auto: 0.01314 ms (AlwaysOMP) NeverOMP: 0.01695 ms [50,1,18,32] lhs=28,800 items (Forward) AlwaysOMP: 0.04886 ms Auto: 0.05012 ms (AlwaysOMP) NeverOMP: 0.34158 ms [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.44269 ms (AlwaysOMP) AlwaysOMP: 0.45424 ms NeverOMP: 3.70322 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.66939 ms (AlwaysOMP) AlwaysOMP: 0.67080 ms NeverOMP: 5.94093 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 5.08734 ms Auto: 5.59218 ms (AlwaysOMP) NeverOMP: 47.50976 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00180 ms Auto: 0.00233 ms (NeverOMP) AlwaysOMP: 0.01261 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00527 ms Auto: 0.00569 ms (NeverOMP) AlwaysOMP: 0.01129 ms [50,1,18,32] lhs=28,800 items (Backward) Auto: 0.01820 ms (AlwaysOMP) AlwaysOMP: 0.01919 ms NeverOMP: 0.06396 ms [25,3,64,64] lhs=307,200 items (Backward) Auto: 0.12825 ms (AlwaysOMP) AlwaysOMP: 0.14127 ms NeverOMP: 0.68567 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.20047 ms (AlwaysOMP) AlwaysOMP: 0.20080 ms NeverOMP: 1.10115 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 4.22520 ms Auto: 4.46675 ms (AlwaysOMP) NeverOMP: 9.52464 ms ****************************** Operators: elemwise_add, _backward_add for type: long ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.00033 ms (NeverOMP) NeverOMP: 0.00037 ms AlwaysOMP: 0.44240 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.00110 ms (NeverOMP) NeverOMP: 0.00114 ms AlwaysOMP: 0.02055 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.02656 ms (NeverOMP) NeverOMP: 0.02659 ms AlwaysOMP: 0.62568 ms [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.11231 ms (AlwaysOMP) AlwaysOMP: 0.18678 ms NeverOMP: 0.29245 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.12811 ms (AlwaysOMP) AlwaysOMP: 0.27840 ms NeverOMP: 0.50060 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 3.03432 ms (AlwaysOMP) AlwaysOMP: 3.11511 ms NeverOMP: 7.78684 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00041 ms Auto: 0.00042 ms (NeverOMP) AlwaysOMP: 0.68749 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.00156 ms Auto: 0.00158 ms (NeverOMP) AlwaysOMP: 0.02029 ms [50,1,18,32] lhs=28,800 items (Backward) Auto: 0.03375 ms (NeverOMP) NeverOMP: 0.03415 ms AlwaysOMP: 0.80831 ms [25,3,64,64] lhs=307,200 items (Backward) Auto: 0.08156 ms (AlwaysOMP) AlwaysOMP: 0.15433 ms NeverOMP: 0.36800 ms [10,3,128,128] lhs=491,520 items (Backward) Auto: 0.12559 ms (AlwaysOMP) AlwaysOMP: 0.32935 ms NeverOMP: 0.62694 ms [20,3,256,256] lhs=3,932,160 items (Backward) AlwaysOMP: 5.61351 ms Auto: 5.77356 ms (AlwaysOMP) NeverOMP: 10.16545 ms ****************************** Operators: elemwise_mul, _backward_mul for type: long ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) NeverOMP: 0.00088 ms Auto: 0.00089 ms (NeverOMP) AlwaysOMP: 0.01266 ms [1,3,28,28] lhs=2,352 items (Forward) NeverOMP: 0.00249 ms Auto: 0.00252 ms (NeverOMP) AlwaysOMP: 0.01679 ms [50,1,18,32] lhs=28,800 items (Forward) Auto: 0.03292 ms (NeverOMP) NeverOMP: 0.03339 ms AlwaysOMP: 0.12523 ms [25,3,64,64] lhs=307,200 items (Forward) AlwaysOMP: 0.12583 ms Auto: 0.26984 ms (AlwaysOMP) NeverOMP: 0.41855 ms [10,3,128,128] lhs=491,520 items (Forward) AlwaysOMP: 0.15543 ms Auto: 0.15921 ms (AlwaysOMP) NeverOMP: 0.71696 ms [20,3,256,256] lhs=3,932,160 items (Forward) Auto: 3.62072 ms (AlwaysOMP) AlwaysOMP: 4.55568 ms NeverOMP: 7.95474 ms [1,1,28,28] lhs=784 items (Backward) NeverOMP: 0.00157 ms Auto: 0.00160 ms (NeverOMP) AlwaysOMP: 0.02596 ms [1,3,28,28] lhs=2,352 items (Backward) Auto: 0.00490 ms (NeverOMP) NeverOMP: 0.00491 ms AlwaysOMP: 0.02228 ms [50,1,18,32] lhs=28,800 items (Backward) Auto: 0.06506 ms (NeverOMP) NeverOMP: 0.06652 ms AlwaysOMP: 0.16738 ms [25,3,64,64] lhs=307,200 items (Backward) AlwaysOMP: 0.14737 ms Auto: 0.19925 ms (AlwaysOMP) NeverOMP: 0.72743 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.22310 ms Auto: 0.52883 ms (AlwaysOMP) NeverOMP: 1.25899 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 8.83143 ms (AlwaysOMP) AlwaysOMP: 8.93563 ms NeverOMP: 14.23877 ms ****************************** Operators: elemwise_div, _backward_div for type: long ****************************** AlwaysOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] Auto Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] NeverOMP Timing: 50 iterations of 10 calls, shape = [1,1,28,28] Timing: 50 iterations of 10 calls, shape = [1,3,28,28] Timing: 50 iterations of 10 calls, shape = [50,1,18,32] Timing: 50 iterations of 10 calls, shape = [25,3,64,64] Timing: 50 iterations of 10 calls, shape = [10,3,128,128] Timing: 50 iterations of 10 calls, shape = [20,3,256,256] [1,1,28,28] lhs=784 items (Forward) Auto: 0.00631 ms (NeverOMP) NeverOMP: 0.00634 ms AlwaysOMP: 0.01287 ms [1,3,28,28] lhs=2,352 items (Forward) Auto: 0.01576 ms (AlwaysOMP) AlwaysOMP: 0.01657 ms NeverOMP: 0.01887 ms [50,1,18,32] lhs=28,800 items (Forward) NeverOMP: 0.23000 ms AlwaysOMP: 0.65993 ms Auto: 1.38577 ms (AlwaysOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Forward) Auto: 0.38266 ms (AlwaysOMP) AlwaysOMP: 0.43081 ms NeverOMP: 2.52711 ms [10,3,128,128] lhs=491,520 items (Forward) Auto: 0.58278 ms (AlwaysOMP) AlwaysOMP: 0.60627 ms NeverOMP: 4.07265 ms [20,3,256,256] lhs=3,932,160 items (Forward) AlwaysOMP: 5.95308 ms Auto: 5.97518 ms (AlwaysOMP) NeverOMP: 33.05736 ms [1,1,28,28] lhs=784 items (Backward) Auto: 0.00353 ms (NeverOMP) NeverOMP: 0.00356 ms AlwaysOMP: 0.02492 ms [1,3,28,28] lhs=2,352 items (Backward) NeverOMP: 0.01046 ms Auto: 0.01244 ms (NeverOMP) AlwaysOMP: 0.02171 ms [50,1,18,32] lhs=28,800 items (Backward) NeverOMP: 0.12701 ms AlwaysOMP: 0.99943 ms Auto: 2.48106 ms (AlwaysOMP) *** WARNING: Wrong OMP state selected *** [25,3,64,64] lhs=307,200 items (Backward) Auto: 0.25094 ms (AlwaysOMP) AlwaysOMP: 0.25353 ms NeverOMP: 1.37647 ms [10,3,128,128] lhs=491,520 items (Backward) AlwaysOMP: 0.39175 ms Auto: 0.39267 ms (AlwaysOMP) NeverOMP: 2.24820 ms [20,3,256,256] lhs=3,932,160 items (Backward) Auto: 10.82176 ms (AlwaysOMP) AlwaysOMP: 11.09119 ms NeverOMP: 21.96863 ms Success rate for type long: 0.88889 [ OK ] OMP_TUNING.EvaluateTuneTestInt64 (2864656 ms) [----------] 8 tests from OMP_TUNING (17245981 ms total) [----------] Global test environment tear-down [==========] 8 tests from 1 test case ran. (17245981 ms total) [ PASSED ] 8 tests.