Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[v1.7.x] Backport #17885 #18128

Merged
merged 1 commit into from
Apr 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -733,18 +733,34 @@ endif()

# extension libraries (custom operators, custom subgraphs) are built by default
add_library(customop_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/gemm_lib.cc)
add_library(transposecsr_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposecsr_lib.cc)
add_library(transposerowsp_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposerowsp_lib.cc)
add_library(subgraph_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_subgraph/subgraph_lib.cc)
add_library(pass_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_pass/pass_lib.cc)
target_include_directories(customop_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
target_include_directories(transposecsr_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
target_include_directories(transposerowsp_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
target_include_directories(subgraph_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
target_include_directories(pass_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
if(USE_CUDA)
add_library(customop_gpu_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/relu_lib.cu)
target_include_directories(customop_gpu_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
endif()
if(MSVC)
if(UNIX)
if (USE_CUDA)
target_compile_options(customop_gpu_lib PUBLIC -shared)
endif()
elseif(MSVC)
target_compile_options(customop_lib PUBLIC /LD)
target_compile_options(transposecsr_lib PUBLIC /LD)
target_compile_options(transposerowsp_lib PUBLIC /LD)
target_compile_options(subgraph_lib PUBLIC /LD)
target_compile_options(pass_lib PUBLIC /LD)
set_target_properties(customop_lib PROPERTIES PREFIX "lib")
set_target_properties(transposecsr_lib PROPERTIES PREFIX "lib")
set_target_properties(transposerowsp_lib PROPERTIES PREFIX "lib")
set_target_properties(subgraph_lib PROPERTIES PREFIX "lib")
set_target_properties(pass_lib PROPERTIES PREFIX "lib")
if(USE_CUDA)
target_compile_options(customop_gpu_lib PUBLIC "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fPIC>")
set_target_properties(customop_gpu_lib PROPERTIES PREFIX "lib")
Expand Down
17 changes: 16 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ pylint:
python3 -m pylint --rcfile=$(ROOTDIR)/ci/other/pylintrc --ignore-patterns=".*\.so$$,.*\.dll$$,.*\.dylib$$" python/mxnet

# MXNet extension dynamically loading libraries
EXT_LIBS = build/libcustomop_lib.so build/libsubgraph_lib.so
EXT_LIBS = build/libcustomop_lib.so build/libtransposecsr_lib.so build/libtransposerowsp_lib.so build/libsubgraph_lib.so build/libpass_lib.so
ifeq ($(USE_CUDA), 1)
EXT_LIBS += build/libcustomop_gpu_lib.so
endif
Expand All @@ -682,6 +682,21 @@ build/libcustomop_gpu_lib.so:
build/libsubgraph_lib.so:
@mkdir -p $(@D)
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_subgraph/subgraph_lib.cc -o $@ -I include/mxnet
build/libtransposecsr_lib.so:
@mkdir -p $(@D)
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_custom_op/transposecsr_lib.cc -o $@ -I include/mxnet
build/libtransposerowsp_lib.so:
@mkdir -p $(@D)
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_custom_op/transposerowsp_lib.cc -o $@ -I include/mxnet
build/libcustomop_gpu_lib.so:
@mkdir -p $(@D)
$(NVCC) -shared -std=c++11 -Xcompiler -fPIC example/extensions/lib_custom_op/relu_lib.cu -o $@ -I include/mxnet
build/libsubgraph_lib.so:
@mkdir -p $(@D)
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_subgraph/subgraph_lib.cc -o $@ -I include/mxnet
build/libpass_lib.so:
@mkdir -p $(@D)
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_pass/pass_lib.cc -o $@ -I include/mxnet

# Cython build
cython:
Expand Down
2 changes: 1 addition & 1 deletion example/extensions/lib_api/init_lib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include "lib_api.h"

MXReturnValue initialize(int version) {
if (version >= 10400) {
if (version >= 10700) {
std::cout << "MXNet version " << version << " supported" << std::endl;
return MX_SUCCESS;
} else {
Expand Down
10 changes: 10 additions & 0 deletions example/extensions/lib_api/test_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,19 @@
import mxnet as mx
import os

# test loading library
if (os.name=='posix'):
path = os.path.abspath('libinit_lib.so')
mx.library.load(path)
elif (os.name=='nt'):
path = os.path.abspath('libinit_lib.dll')
mx.library.load(path)

# test loading library with verbose=False
if (os.name=='posix'):
path = os.path.abspath('libinit_lib.so')
mx.library.load(path, False)
elif (os.name=='nt'):
path = os.path.abspath('libinit_lib.dll')
mx.library.load(path, False)

57 changes: 34 additions & 23 deletions example/extensions/lib_custom_op/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,13 @@ C++ Custom Operator Example and Tutorial

Adding new operators in MXNet requires understanding of MXNet backend operator registration and recompiling of MXNet with all its dependencies. Users can use the old Python custom operator to add new operators, but it is slow, complicated and has poor adoption rate. So our approach for adding custom operators is to enable dynamic loading of C++ custom operators compiled in external libraries at runtime.

Custom operators (CustomOp) enable users to write new operators without compiling against all of MXNet header files and dependencies. When a library containing custom operators is loaded dynamically, the operators found in the library will be re-registered in MXNet so that users can call those operators natively just like other built-in operators.
Custom operators (CustomOp) enable users to write new operators without compiling against all of MXNet header files and dependencies. When a library containing custom operators is loaded dynamically, the operators found in the library will be registered in MXNet so that users can call those operators natively just like other built-in operators.

## Getting Started

### Have MXNet Ready

Custom Operator support was merged (#15921, #17270) and is not available in versions of MXNet prior to v1.7.0.
To access the feature now, please install MXNet by compiling from source using master or using the previously mentioned commits, downloading one of the nightly builds, or from a release of MXNet 1.7.0+.
For running the following example, it doesn’t matter if it is a CUDA, MKLDNN or plain MXNet build; the custom operator doesn’t interact with the execution of other native MXNet operators.
To run the following example, the build type of MXNet doesn’t matter since the custom operator doesn’t interact with the execution of other native MXNet operators.
Note that if you want to run GPU examples and write your custom operators running on GPU, you still need an MXNet CUDA build.

### Run An Example
Expand Down Expand Up @@ -117,8 +115,7 @@ There are several required building blocks for making a custom operator:

```c++
MXReturnValue parseAttrs(
std::map<std::string,
std::string> attrs,
const std::unordered_map<std::string, std::string>& attrs,
int* num_in,
int* num_out)
```
Expand All @@ -129,30 +126,30 @@ There are several required building blocks for making a custom operator:

```c++
MXReturnValue inferType(
std::map<std::string, std::string> attrs,
std::vector<int> &intypes,
std::vector<int> &outtypes)
const std::unordered_map<std::string, std::string>& attrs,
std::vector<int>* intypes,
std::vector<int>* outtypes)
```

* [inferShape](./gemm_lib.cc#L143):
* This function specifies how the custom operator infers output tensor shape using input shape.

```c++
MXReturnValue inferShape(
std::map<std::string, std::string> attrs,
std::vector<std::vector<unsigned int>> &inshapes,
std::vector<std::vector<unsigned int>> &outshapes)
const std::unordered_map<std::string, std::string>& attrs,
std::vector<std::vector<unsigned int>>* inshapes,
std::vector<std::vector<unsigned int>>* outshapes)
```

* [forward](./gemm_lib.cc#L56):
* This function specifies the computation of the forward pass of the operator.

```c++
MXReturnValue forward(
std::map<std::string, std::string> attrs,
std::vector<MXTensor> inputs,
std::vector<MXTensor> outputs,
OpResource res)
const std::unordered_map<std::string, std::string>& attrs,
std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& res)
```

Also there are some optional functions you can specify:
Expand All @@ -162,19 +159,30 @@ Also there are some optional functions you can specify:

```c++
MXReturnValue backward(
std::map<std::string, std::string> attrs,
std::vector<MXTensor> inputs,
std::vector<MXTensor> outputs,
OpResource res)
const std::unordered_map<std::string, std::string>& attrs,
std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& res)
```

* [inferSType](./transposecsr_lib.cc#168) - Storage Type Inference:
* This function specifies how the custom operator infers storage types for inputs and outputs.

```c++
MXReturnValue inferSType(
const std::unordered_map<std::string, std::string>& attrs,
std::vector<MXTensor>* inputs,
std::vector<MXTensor>* outputs,
const OpResource& res)
```

* [mutateInputs](./gemm_lib.cc#L214) - Specify mutable input:
* This function allows you to mark some inputs to be mutable inputs. It is useful when using aux parameters for BatchNorm-like operators.

```c++
MXReturnValue mutateInputs(
std::map<std::string, std::string> attrs,
std::vector<int> &input_indices)
const std::unordered_map<std::string, std::string>& attrs,
std::vector<int>* input_indices)
```

After specifying those functions, register the custom opeartor with MXNet:
Expand All @@ -200,6 +208,9 @@ If the number of input and output tensors are fixed, you can use hard-coded numb
* **inferType**: This function takes three arguments. The 1st argument is the attributes (same as above). The 2nd argument is the a list of input data types corresponding to the input tensors. The 3rd argument is the placeholder for output tensor data types you need to assign.
For example, if this operator has one input and one output, and data type doesn’t change, then you can do `outtypes[0] = intypes[0]` to populate the data type.

* **inferSType**: This function takes three arguments. The 1st argument is the attributes (same as above). The 2nd argument is the a list of input storage types corresponding to the input tensors. The 3rd argument is the placeholder for output storage types you need to assign.
For example, if this operator has one input and one output, and data type doesn’t change, then you can do `outtypes[0] = intypes[0]` to populate the data type.

* **inferShape**: This function is similar to the `inferType` function, except it is used for populating the output data shapes. You need to figure out the shapes of each output tensors for this computation.
For example, if the inputs are images with shape (224,224,3) and you write a padding operator to make 10px borders for the images, then your output shape will be (234,234,3).

Expand Down Expand Up @@ -285,7 +296,7 @@ As a result, you don’t need to call `cudaMemcpy` to move the tensor data to th
}
```

Note that the `cuda_stream` object used for launching kernels is passed from MXNet backend via `OpResource` object. See below for details of `Operator Resource`.
Note that the `cuda_stream` object used for launching kernels is passed from MXNet backend via `OpResource` object. See below for details of `Operator Resource`. You need to compile the `lib_api.h` header file with `nvcc` if you plan to create a custom GPU operator to enable the GPU support in the APIs.
Also, `in_data` and `out_data` are pointers to the tensor data allocated on the GPU, so you can pass them directly to your CUDA kernel.

At this point all the attribute functions for each operator (`parseAttrs`, `inferShape`, etc.) run on the CPU, including the `forwardGPU` function. The only part that will actually run on the GPU is the launched CUDA kernel function.
Expand Down
Loading