Skip to content
This repository has been archived by the owner on Nov 11, 2022. It is now read-only.

Commit

Permalink
fix_elemwise_add
Browse files Browse the repository at this point in the history
  • Loading branch information
KexinFeng committed Jun 23, 2021
1 parent da4ff3a commit 6c2f76f
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion 3rdparty/onednn
Submodule onednn updated 74 files
+1 −1 CMakeLists.txt
+0 −10 cmake/platform.cmake
+0 −1 src/common/primitive_cache.hpp
+19 −0 src/cpu/CMakeLists.txt
+0 −4 src/cpu/matmul/gemm_based_common.hpp
+25 −21 src/cpu/matmul/gemm_bf16_matmul.cpp
+19 −49 src/cpu/matmul/gemm_f32_matmul.cpp
+25 −27 src/cpu/matmul/gemm_x8s8s32x_matmul.cpp
+2 −25 src/cpu/reorder/cpu_reorder_comp_bf16_s8.cpp
+1 −24 src/cpu/reorder/cpu_reorder_comp_f32_s8.cpp
+2 −25 src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp
+18 −40 src/cpu/reorder/simple_reorder.hpp
+16 −0 src/cpu/x64/CMakeLists.txt
+5 −8 src/cpu/x64/jit_avx512_core_x8s8s32x_convolution.cpp
+1 −0 src/cpu/x64/jit_gemm_inner_product_utils.cpp
+0 −25 src/cpu/x64/jit_generator.hpp
+1 −7 src/cpu/x64/jit_uni_pooling.cpp
+162 −227 src/cpu/x64/jit_uni_reorder.cpp
+1 −10 src/cpu/x64/jit_uni_reorder.hpp
+18 −105 src/cpu/x64/jit_uni_reorder_utils.cpp
+5 −9 src/cpu/x64/jit_uni_x8s8s32x_convolution.cpp
+2 −2 src/gpu/compute/compute_engine.hpp
+4 −4 src/gpu/compute/device_info.cpp
+3 −3 src/gpu/compute/device_info.hpp
+7 −7 src/gpu/gpu_impl_list.cpp
+3 −3 src/gpu/jit/binary_format.cpp
+30 −30 src/gpu/jit/gemm/gemm_recipes.hpp
+1 −1 src/gpu/jit/gemm/gen_gemm.hpp
+33 −33 src/gpu/jit/gemm/gen_gemm_kernel.cpp
+2 −2 src/gpu/jit/gemm/gen_gemm_kernel.hpp
+34 −32 src/gpu/jit/gemm/gen_gemm_kernel_generator.cpp
+2 −2 src/gpu/jit/gemm/gen_gemm_kernel_generator.hpp
+2 −2 src/gpu/jit/jit_generator.hpp
+125 −125 src/gpu/jit/ngen/ngen.hpp
+65 −65 src/gpu/jit/ngen/ngen_asm.hpp
+8 −9 src/gpu/jit/ngen/ngen_auto_swsb.hpp
+29 −29 src/gpu/jit/ngen/ngen_core.hpp
+5 −5 src/gpu/jit/ngen/ngen_gen12.hpp
+2 −2 src/gpu/jit/ngen/ngen_pseudo.hpp
+5 −5 src/gpu/jit/ngen/ngen_register_allocator.cpp
+2 −2 src/gpu/jit/ngen/ngen_register_allocator.hpp
+2 −2 src/gpu/jit/ngen/ngen_register_decl.hpp
+2 −2 src/gpu/jit/ngen/npack/neo_packager.hpp
+2 −2 src/gpu/jit/ngen/npack/neo_structs.hpp
+15 −15 src/gpu/ocl/gemm/gen12lp_gemm.cpp
+10 −10 src/gpu/ocl/gemm/gen12lp_gemm.hpp
+6 −6 src/gpu/ocl/gemm/gen12lp_gemm_kernel.hpp
+2 −2 src/gpu/ocl/gemm/gen12lp_gemm_nocopy_scale_x8x8s32.cl
+5 −5 src/gpu/ocl/gemm/gen12lp_gemm_nocopy_x8x8s32.cl
+1 −1 src/gpu/ocl/gen12lp_1x1_conv_fwd_data_x8s8x.cl
+0 −0 src/gpu/ocl/gen12lp_conv_bwd_data_mb_block_x8s8x8.cl
+0 −0 src/gpu/ocl/gen12lp_conv_bwd_data_x8s8x8.cl
+0 −0 src/gpu/ocl/gen12lp_conv_dw_fwd_data_mb_block_x8s8x.cl
+0 −0 src/gpu/ocl/gen12lp_conv_dw_fwd_data_ow_block_x8s8x.cl
+0 −0 src/gpu/ocl/gen12lp_conv_fwd_data_first_x8s8x.cl
+0 −0 src/gpu/ocl/gen12lp_conv_fwd_data_mb_block_x8s8x.cl
+0 −0 src/gpu/ocl/gen12lp_conv_fwd_data_ow_block_x8s8x.cl
+0 −0 src/gpu/ocl/gen12lp_conv_nhwc_fwd_dw_mb_block_x8s8x.cl
+0 −0 src/gpu/ocl/gen12lp_conv_nhwc_fwd_dw_ow_block_x8s8x.cl
+0 −0 src/gpu/ocl/gen12lp_conv_nhwc_fwd_first_x8s8x.cl
+0 −0 src/gpu/ocl/gen12lp_conv_nhwc_fwd_x8s8x.cl
+1 −1 src/gpu/ocl/gen12lp_nhwc_1x1_conv_fwd_x8s8x.cl
+6 −5 src/gpu/ocl/gen12lp_x8s8x_1x1_convolution.cpp
+10 −8 src/gpu/ocl/gen12lp_x8s8x_1x1_convolution.hpp
+2 −2 src/gpu/ocl/gen12lp_x8s8x_compensation.cl
+8 −8 src/gpu/ocl/gen12lp_x8s8x_convolution.cpp
+10 −9 src/gpu/ocl/gen12lp_x8s8x_convolution.hpp
+4 −4 src/gpu/ocl/ocl_gpu_detect.cpp
+2 −2 src/gpu/ocl/ref_prelu.cl
+17 −0 tests/CMakeLists.txt
+0 −0 tests/benchdnn/inputs/conv/perf_conv_gen12lp
+0 −0 tests/benchdnn/inputs/ip/perf_ip_gen12lp
+1 −1 tests/benchdnn/inputs/reorder/harness_reorder_compensation
+0 −0 tests/benchdnn/inputs/rnn/perf_rnn_gen12lp
2 changes: 1 addition & 1 deletion src/operator/tensor/elemwise_binary_op_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ The storage type of ``elemwise_add`` output depends on storage types of inputs
- otherwise, ``elemwise_add`` generates output with default storage
)code")
.set_attr<nnvm::FGradient>("FGradient", CloneGradient{"_backward_add"});
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_add"});

// specialized gradient add function to do add to optimization
// this must differ from elemwise_add to prevent add to optimization in forward pass.
Expand Down

0 comments on commit 6c2f76f

Please sign in to comment.