diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 85720e69fb3d..5279dd24a716 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,42 +1,52 @@ -For bugs or installation issues, please provide the following information. -The more information you provide, the more likely people will be able to help you. +Note: Providing complete information in the most concise form is the best way to get help. This issue template serves as the checklist for essential information to most of the technical issues and bug reports. For non-technical issues and feature requests, feel free to present the information in what you believe is the best form. -## Environment info -Operating System: +For Q & A and discussion, please start a discussion thread at https://discuss.mxnet.io -Compiler: +## Description +(Brief description of the problem in no more than 2 sentences.) -Package used (Python/R/Scala/Julia): +## Environment info (Required) + +``` +What to do: +1. Download the diagnosis script from https://raw.githubusercontent.com/apache/incubator-mxnet/master/tools/diagnose.py +2. Run the script using `python diagnose.py` and paste its output here. -MXNet version: +``` + +Package used (Python/R/Scala/Julia): +(I'm using ...) -Or if installed from source: +For Scala user, please provide: +1. Java version: (`java -version`) +2. Maven version: (`mvn -version`) +3. Scala runtime if applicable: (`scala -version`) -MXNet commit hash (`git rev-parse HEAD`): +For R user, please provide R `sessionInfo()`: -If you are using python package, please provide +## Build info (Required if built from source) -Python version and distribution: +Compiler (gcc/clang/mingw/visual studio): -If you are using R package, please provide +MXNet commit hash: +(Paste the output of `git rev-parse HEAD` here.) -R `sessionInfo()`: +Build config: +(Paste the content of config.mk, or the build command.) ## Error Message: -Please paste the full error message, including stack trace. +(Paste the complete error message, including stack trace.) ## Minimum reproducible example -if you are using your own code, please provide a short script that reproduces the error. +(If you are using your own code, please provide a short script that reproduces the error. Otherwise, please provide link to the existing example.) ## Steps to reproduce -or if you are running standard examples, please provide the commands you have run that lead to the error. +(Paste the commands you ran that produced the error.) 1. 2. -3. ## What have you tried to solve it? 1. 2. -3. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 000000000000..468be298b8bd --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,18 @@ +## Description ## +(Brief description on what this PR is about) + +## Checklist ## +### Essentials ### +- [ ] Passed code style checking (`make lint`) +- [ ] Changes are complete (i.e. I finished coding on this PR) +- [ ] All changes have test coverage +- [ ] For user-facing API changes, API doc string has been updated. For new C++ functions in header files, their functionalities and arguments are well-documented. +- [ ] To my best knowledge, examples are either not affected by this change, or have been fixed to be compatible with this change + +### Changes ### +- [ ] Feature1, tests, (and when applicable, API doc) +- [ ] Feature2, tests, (and when applicable, API doc) + +## Comments ## +- If this change is a backward incompatible change, why must this change be made. +- Interesting edge cases to note here diff --git a/.gitignore b/.gitignore index 82d2e560237d..fbd62c9ec552 100644 --- a/.gitignore +++ b/.gitignore @@ -60,6 +60,7 @@ tracker __pycache__ *.pkl *.params +*.states *.json *.d build @@ -146,3 +147,10 @@ bld target bin/im2rec + +model/ + +# generated function signature for IDE auto-complete +python/mxnet/symbol/gen_* +python/mxnet/ndarray/gen_* +python/.eggs diff --git a/CMakeLists.txt b/CMakeLists.txt index dc9ca5f7bb0c..b6bb81418231 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,31 +1,63 @@ cmake_minimum_required(VERSION 3.0.2) -project(mxnet C CXX) +if((${CMAKE_VERSION} VERSION_GREATER "3.9.0") OR (${CMAKE_VERSION} VERSION_EQUAL "3.9.0")) + set(FIRST_CUDA TRUE) +else() + set(FIRST_CUDA FALSE) +endif() +include(cmake/Utils.cmake) -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake) - include(${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake) +#Some things have order. This must be put in front alone +mxnet_option(USE_CUDA "Build with CUDA support" ON) +mxnet_option(USE_OLDCMAKECUDA "Build with old cmake cuda" OFF) +if(USE_CUDA) + add_definitions(-DMSHADOW_USE_CUDA=1) + IF(FIRST_CUDA AND (NOT USE_OLDCMAKECUDA)) + set(__cuda_toolset "7.5" "8.0" "9.0") + set(CUDA_TOOLSET "8.0" CACHE STRING "Select CUDA Version.") + set_property( CACHE CUDA_TOOLSET PROPERTY STRINGS "" ${__cuda_toolset} ) + set(CMAKE_GENERATOR_TOOLSET "cuda=${CUDA_TOOLSET},host=x64") + project(mxnet C CXX CUDA) + else() + project(mxnet C CXX) + set(FIRST_CUDA FALSE) + endif() +else() + project(mxnet C CXX) + add_definitions(-DMSHADOW_USE_CUDA=0) endif() -set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules;${CMAKE_MODULE_PATH}") -include(cmake/Utils.cmake) mxnet_option(USE_OPENCV "Build with OpenCV support" ON) mxnet_option(USE_OPENMP "Build with Openmp support" ON) -mxnet_option(USE_CUDA "Build with CUDA support" ON) mxnet_option(USE_CUDNN "Build with cudnn support" ON) # one could set CUDNN_ROOT for search path mxnet_option(USE_LAPACK "Build with lapack support" ON IF NOT MSVC) mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON) mxnet_option(USE_MKLML_MKL "Use MKLML variant of MKL (if MKL found)" ON IF USE_MKL_IF_AVAILABLE AND UNIX AND (NOT APPLE)) mxnet_option(USE_MKL_EXPERIMENTAL "Use experimental MKL (if MKL enabled and found)" OFF) -mxnet_option(USE_JEMALLOC "Build with Jemalloc support" OFF) +mxnet_option(USE_OPERATOR_TUNING "Enable auto-tuning of operators" ON AND NOT MSVC) +mxnet_option(USE_GPERFTOOLS "Build with GPerfTools support (if found)" ON) +mxnet_option(USE_JEMALLOC "Build with Jemalloc support" ON) mxnet_option(USE_PROFILER "Build with Profiler support" OFF) mxnet_option(USE_DIST_KVSTORE "Build with DIST_KVSTORE support" OFF) -mxnet_option(USE_PLUGINS_WARPCTC "Use WARPCTC Plugins" OFF) +mxnet_option(USE_PLUGINS_WARPCTC "Use WARPCTC Plugins" OFF) mxnet_option(USE_PLUGIN_CAFFE "Use Caffe Plugin" OFF) mxnet_option(USE_CPP_PACKAGE "Build C++ Package" OFF) mxnet_option(USE_MXNET_LIB_NAMING "Use MXNet library naming conventions." ON) mxnet_option(USE_GPROF "Compile with gprof (profiling) flag" OFF) mxnet_option(USE_VTUNE "Enable use of Intel Amplifier XE (VTune)" OFF) # one could set VTUNE_ROOT for search path +mxnet_option(INSTALL_EXAMPLES "Install the example source files." OFF) + + + +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake) + include(${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake) +endif() + +set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules;${CMAKE_MODULE_PATH}") + + + SET(EXTRA_OPERATORS "" CACHE PATH "EXTRA OPERATORS PATH") @@ -34,6 +66,7 @@ if("$ENV{VERBOSE}" STREQUAL "1") set(CMAKE_VERBOISE_MAKEFILE ON) endif() + if(MSVC) add_definitions(-DWIN32_LEAN_AND_MEAN) add_definitions(-DDMLC_USE_CXX11) @@ -87,6 +120,9 @@ if(USE_VTUNE) if(NOT VTUNE_ROOT) set(VTUNE_ROOT /opt/intel/vtune_amplifier_xe_2017) endif() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-omit-frame-pointer -g -pg") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer -g -pg") + set(CMAKE_LINK_LIBRARY_FILE_FLAG "${CMAKE_LINK_LIBRARY_FILE_FLAG} -g -pg") add_definitions(-DMXNET_USE_VTUNE=1) include_directories(${VTUNE_ROOT}/include) list(APPEND mxnet_LINKER_LIBS ${VTUNE_ROOT}/lib64/libittnotify.a) @@ -108,6 +144,8 @@ if(USE_MKL_IF_AVAILABLE) if(NOT MSVC) list(APPEND mxnet_LINKER_LIBS dl) endif() + # If using MKL, use the Intel OMP libraries + list(APPEND mxnet_LINKER_LIBS iomp5) if(USE_MKL_EXPERIMENTAL) add_definitions(-DMKL_EXPERIMENTAL=1) else() @@ -122,14 +160,20 @@ endif() include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) -if(EXISTS ${PROJECT_SOURCE_DIR}/mshadow/cmake) - include(mshadow/cmake/mshadow.cmake) +if(FIRST_CUDA) + include(cmake/ChooseBlas.cmake) include(mshadow/cmake/Utils.cmake) - include(mshadow/cmake/Cuda.cmake) + include(cmake/FirstClassLangCuda.cmake) else() - include(mshadowUtils) - include(Cuda) - include(mshadow) + if(EXISTS ${PROJECT_SOURCE_DIR}/mshadow/cmake) + include(mshadow/cmake/mshadow.cmake) + include(mshadow/cmake/Utils.cmake) + include(mshadow/cmake/Cuda.cmake) + else() + include(mshadowUtils) + include(Cuda) + include(mshadow) + endif() endif() list(APPEND mxnet_LINKER_LIBS ${mshadow_LINKER_LIBS}) @@ -150,7 +194,7 @@ include_directories("dlpack/include") # add_subdirectory(dlpack) #endif() -if(NOT MSVC) +if(NOT MSVC AND NOT APPLE) set(BEGIN_WHOLE_ARCHIVE -Wl,--whole-archive) set(END_WHOLE_ARCHIVE -Wl,--no-whole-archive) endif() @@ -162,16 +206,38 @@ if(UNIX) endif() endif() +set(ALT_MALLOC_FLAGS "-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free") + +# ---[ gperftools +if(USE_GPERFTOOLS) + find_package(Gperftools) + if(GPERFTOOLS_FOUND) + message(STATUS "Using Gperftools malloc (tcmalloc)") + include_directories(${GPERFTOOLS_INCLUDE_DIR}) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ALT_MALLOC_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ALT_MALLOC_FLAGS}") + set(mxnet_LINKER_LIBS ${mxnet_LINKER_LIBS} ${GPERFTOOLS_LIBRARIES}) + set(USE_JEMALLOC 0) + endif() +endif() + # ---[ jemalloc if(USE_JEMALLOC) + if(USE_GPERFTOOLS) + message(ERROR "Only one of USE_JEMALLOC and USE_GPERFTOOLS can be defined at once") + endif() find_package(JeMalloc) if(JEMALLOC_FOUND) + message(STATUS "Using JEMalloc malloc") add_definitions(-DUSE_JEMALLOC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ALT_MALLOC_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ALT_MALLOC_FLAGS}") include_directories(${JEMALLOC_INCLUDE_DIRS}) set(mxnet_LINKER_LIBS ${mxnet_LINKER_LIBS} ${JEMALLOC_LIBRARIES}) endif() endif() +# ---[ OpenCV if(USE_OPENCV) find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs) if(NOT OpenCV_FOUND) # if not OpenCV 3.x, then imgcodecs are not found @@ -194,36 +260,41 @@ else(USE_OPENCV) add_definitions(-DMXNET_USE_OPENCV=0) endif() +# ---[ OpenMP if(USE_OPENMP) find_package(OpenMP REQUIRED) - if(OPENMP_FOUND) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/openmp/CMakeLists.txt) + # Intel/llvm OpenMP: https://github.com/llvm-mirror/openmp + set(OPENMP_STANDALONE_BUILD TRUE) + set(LIBOMP_ENABLE_SHARED FALSE) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/openmp) + list(REMOVE_ITEM mxnet_LINKER_LIBS iomp5) + list(APPEND mxnet_LINKER_LIBS omp) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + else() + if(OPENMP_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + endif() endif() elseif(UNIX) list(APPEND mxnet_LINKER_LIBS pthread) endif() +# ---[ LAPack if(USE_LAPACK) add_definitions(-DMXNET_USE_LAPACK=1) list(APPEND mxnet_LINKER_LIBS lapack) else(USE_LAPACK) # Workaround for Windows until using new Jenkinsfile. - if(USE_BLAS STREQUAL "open") + if(BLAS STREQUAL "Open" OR BLAS STREQUAL "open") add_definitions(-DMXNET_USE_LAPACK=1) endif() endif() - -if(UNIX) - find_library(RTLIB rt) - if(RTLIB) - list(APPEND mxnet_LINKER_LIBS ${RTLIB}) - endif() -endif() - # ---[ jemalloc if(USE_JEMALLOC) find_package(JeMalloc) @@ -254,11 +325,18 @@ endif() if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mshadow/cmake) add_subdirectory("mshadow") endif() -FILE(GLOB_RECURSE SOURCE "src/*.cc" "src/*.h") +FILE(GLOB_RECURSE SOURCE "src/*.cc" "src/*.h" "include/*.h") FILE(GLOB_RECURSE CUDA "src/*.cu" "src/*.cuh") # add nnvm to source -FILE(GLOB_RECURSE NNVMSOURCE "nnvm/src/*.cc" "nnvm/src/*.h" "nnvm/include/*.h") +FILE(GLOB_RECURSE NNVMSOURCE + nnvm/src/c_api/*.cc + nnvm/src/core/*.cc + nnvm/src/pass/*.cc + nnvm/src/c_api/*.h + nnvm/src/core/*.h + nnvm/src/pass/*.h + nnvm/include/*.h) list(APPEND SOURCE ${NNVMSOURCE}) # add mshadow file @@ -289,6 +367,10 @@ if(USE_PLUGINS_WARPCTC) list(APPEND CUDA ${PLUGINS_CUSRC}) endif() +if(USE_OPERATOR_TUNING) + add_definitions(-DMXNET_USE_OPERATOR_TUNING=1) +endif() + if(USE_PLUGIN_CAFFE) if(NOT USE_CUDA) set(CPU_ONLY ON) @@ -343,37 +425,46 @@ if(MSVC) endif() if(USE_CUDA) - list(APPEND CUDA_INCLUDE_DIRS ${INCLUDE_DIRECTORIES}) - # define preprocessor macro so that we will not include the generated forcelink header - mshadow_cuda_compile(cuda_objs ${CUDA}) - if(MSVC) - FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") - list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY}) - set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib") - list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY}) - FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") - list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For fft operator - FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") - list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver - else(MSVC) - list(APPEND mxnet_LINKER_LIBS nvrtc cuda cufft cusolver) - link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") - endif() - list(APPEND SOURCE ${cuda_objs} ${CUDA}) - add_definitions(-DMXNET_USE_CUDA=1) - add_definitions(-DMXNET_USE_NVRTC=1) - if(CUDA_LIBRARY_PATH) - if(IS_CONTAINER_BUILD) - # In case of building on a production-like build container which may not have Cuda installed - if(NOT CMAKE_SYSTEM_HAS_CUDA) - # Assuming building in a container that doesn't have CUDA installed (ie CPU-only build machine) - # so use the stub cuda driver shared library - if(EXISTS ${CUDA_LIBRARY_PATH}/stubs/libcuda.so) - link_directories(${CUDA_LIBRARY_PATH}/stubs) + if(FIRST_CUDA) + mshadow_select_nvcc_arch_flags(NVCC_FLAGS_ARCH) + string(REPLACE ";" " " NVCC_FLAGS_ARCH "${NVCC_FLAGS_ARCH}") + set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS_ARCH}") + set(CMAKE_CUDA_FLAGS_RELEASE "${NVCC_FLAGS_ARCH} -use_fast_math") + list(APPEND mxnet_LINKER_LIBS nvrtc cuda cublas cufft cusolver curand) + list(APPEND SOURCE ${CUDA}) + add_definitions(-DMXNET_USE_CUDA=1) + else() + list(APPEND CUDA_INCLUDE_DIRS ${INCLUDE_DIRECTORIES}) + # define preprocessor macro so that we will not include the generated forcelink header + mshadow_cuda_compile(cuda_objs ${CUDA}) + if(MSVC) + FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") + list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY}) + set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib") + list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY}) + FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") + list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For fft operator + FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") + list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver + else(MSVC) + list(APPEND mxnet_LINKER_LIBS nvrtc cuda cufft cusolver) + link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + endif() + list(APPEND SOURCE ${cuda_objs} ${CUDA}) + add_definitions(-DMXNET_USE_CUDA=1) + if(CUDA_LIBRARY_PATH) + if(IS_CONTAINER_BUILD) + # In case of building on a production-like build container which may not have Cuda installed + if(NOT CMAKE_SYSTEM_HAS_CUDA) + # Assuming building in a container that doesn't have CUDA installed (ie CPU-only build machine) + # so use the stub cuda driver shared library + if(EXISTS ${CUDA_LIBRARY_PATH}/stubs/libcuda.so) + link_directories(${CUDA_LIBRARY_PATH}/stubs) + endif() + endif() endif() - endif() endif() - endif() + endif() endif() # unsupported: if caffe is a subdirectory of mxnet, load its CMakeLists.txt as well @@ -398,53 +489,63 @@ else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") endif() +set(MXNET_INSTALL_TARGETS mxnet) if(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin" AND USE_MXNET_LIB_NAMING) add_library(mxnet MODULE ${SOURCE}) + add_library(mxnet_static STATIC ${SOURCE}) else() if(UNIX) + list(APPEND MXNET_INSTALL_TARGETS mxnet_static) add_library(mxnet_static STATIC ${SOURCE}) # Need an arbitrary source file to trigger CMake to build the library add_library(mxnet SHARED) - # This has prolems, as it adds libmxnet_static to INTERFACE_LINK_LIBRARIES - target_link_libraries(mxnet "-Wl,--whole-archive $ -Wl,--no-whole-archive") - target_link_libraries(mxnet mxnet_static) # Let cmake understand the dependency - add_custom_target( - StaticallyLinkStaticMXNetLibrary ALL - BYPRODUCTS ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/libmxnet.a - WORKING_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY} - COMMAND ln -sf libmxnet_static.a libmxnet.a - DEPENDS mxnet_static - ) + set_target_properties(mxnet_static PROPERTIES OUTPUT_NAME mxnet) + target_link_libraries(mxnet PRIVATE "-Wl,--whole-archive $ -Wl,--no-whole-archive") + target_link_libraries(mxnet PRIVATE mxnet_static) # Let cmake understand the dependency else() add_library(mxnet SHARED ${SOURCE}) endif() endif() -target_link_libraries(mxnet ${mxnet_LINKER_LIBS}) - -if(USE_PLUGINS_WARPCTC) - target_link_libraries(mxnet debug ${WARPCTC_LIB_DEBUG}) - target_link_libraries(mxnet optimized ${WARPCTC_LIB_RELEASE}) -endif() - -target_link_libraries(mxnet dmlc) -if(MSVC AND USE_MXNET_LIB_NAMING) - set_target_properties(mxnet PROPERTIES OUTPUT_NAME "libmxnet") +if(USE_CUDA) + if(FIRST_CUDA) + target_compile_options(mxnet PUBLIC "$<$:-Xcompiler=-MTd>") + target_compile_options(mxnet PUBLIC "$<$:-Xcompiler=-MT>") + endif() endif() - - if(USE_DIST_KVSTORE) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ps-lite/CMakeLists.txt) add_subdirectory("ps-lite") - list(APPEND pslite_LINKER_LIBS pslite) - target_link_libraries(mxnet debug ${pslite_LINKER_LIBS_DEBUG}) - target_link_libraries(mxnet optimized ${pslite_LINKER_LIBS_RELEASE}) + list(APPEND pslite_LINKER_LIBS pslite protobuf) + target_link_libraries(mxnet PUBLIC debug ${pslite_LINKER_LIBS_DEBUG}) + target_link_libraries(mxnet PUBLIC optimized ${pslite_LINKER_LIBS_RELEASE}) + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS_DEBUG}) + else() + list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS_RELEASE}) + endif() + target_link_libraries(mxnet PUBLIC debug ${pslite_LINKER_LIBS_DEBUG}) + target_link_libraries(mxnet PUBLIC optimized ${pslite_LINKER_LIBS_RELEASE}) + else() - set(pslite_LINKER_LIBS protobuf zmq-static ) + set(pslite_LINKER_LIBS protobuf zmq-static) endif() add_definitions(-DMXNET_USE_DIST_KVSTORE) - target_link_libraries(mxnet ${pslite_LINKER_LIBS}) include_directories(SYSTEM ${pslite_INCLUDE_DIR}) + list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS}) +endif() + +target_link_libraries(mxnet PUBLIC ${mxnet_LINKER_LIBS}) + +if(USE_PLUGINS_WARPCTC) + target_link_libraries(mxnet PUBLIC debug ${WARPCTC_LIB_DEBUG}) + target_link_libraries(mxnet PUBLIC optimized ${WARPCTC_LIB_RELEASE}) +endif() + +target_link_libraries(mxnet PUBLIC dmlc) + +if(MSVC AND USE_MXNET_LIB_NAMING) + set_target_properties(mxnet PROPERTIES OUTPUT_NAME "libmxnet") endif() if(USE_PROFILER) @@ -453,6 +554,18 @@ endif() add_subdirectory(tests) +include(GNUInstallDirs) +install(TARGETS ${MXNET_INSTALL_TARGETS} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} +) + +install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +if (INSTALL_EXAMPLES) + install(DIRECTORY example DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}) +endif() + # AUTO_INSTALL_DIR -> Optional: specify post-build install direcory if(AUTO_INSTALL_DIR) # ---[ Install Includes @@ -493,6 +606,7 @@ if(MSVC) find_package(PythonInterp) set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE} CACHE FILEPATH "Path to the python executable") endif() -set(LINT_DIRS include src scripts python tests cpp-package) -add_custom_target(mxnet_lint COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE} -DLINT_DIRS=${LINT_DIRS} -DPROJECT_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -DPROJECT_NAME=mxnet -P ${CMAKE_CURRENT_SOURCE_DIR}/dmlc-core/cmake/lint.cmake) +set(LINT_DIRS "include src plugin cpp-package tests") +set(EXCLUDE_PATH "src/operator/contrib/ctc_include") +add_custom_target(mxnet_lint COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE} -DLINT_DIRS=${LINT_DIRS} -DPROJECT_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -DPROJECT_NAME=mxnet -DEXCLUDE_PATH=${EXCLUDE_PATH} -P ${CMAKE_CURRENT_SOURCE_DIR}/dmlc-core/cmake/lint.cmake) diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 000000000000..57b4ec3cb3fb --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,14 @@ +# Owners of Apache MXNet + +# Global owners +* @apache/mxnet-committers + +# Owners of language bindings +R-package/* @thirdwing +scala-package/* @javelinjs +perl-package/* @sergeykolychev + +# CMake owners +CMakeLists.txt @cjolivier01 +cmake/* @cjolivier01 + diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 8cae93854e19..64cd29dc0bbe 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -1,5 +1,5 @@ -Contributors of DMLC/MXNet -========================== +Contributors of Apache MXNet (incubating) +========================================= MXNet has been developed by a community of people who are interested in large-scale machine learning and deep learning. Everyone is more than welcomed to is a great way to make the project better and more accessible to more users. @@ -7,6 +7,7 @@ Committers ---------- Committers are people who have made substantial contribution to the project and being active. The committers are the granted write access to the project. +A full list of committers can be found here: http://incubator.apache.org/projects/mxnet.html * [Bing Xu](https://github.com/antinucleon) - Bing is the initiator and major contributor of operators and ndarray modules of mxnet. @@ -39,6 +40,7 @@ The committers are the granted write access to the project. - Zixuan is one of major maintainers of mxnet scala package. * [Yuan Tang](https://github.com/terrytangyuan) - Yuan is one of major maintainers of mxnet scala package. +* [Chris Olivier](https://github.com/cjolivier01) ### Become a Committer MXNet is a opensource project and we are actively looking for new committers @@ -50,7 +52,7 @@ New committers will be proposed by current committers, with support from more th List of Contributors -------------------- -* [Full List of Contributors](https://github.com/dmlc/mxnet/graphs/contributors) +* [Full List of Contributors](https://github.com/apache/incubator-mxnet/graphs/contributors) - To contributors: please add your name to the list when you submit a patch to the project:) * [Feng Wang](https://github.com/happynear) - Feng makes mxnet compatible with Windows Visual Studio. @@ -146,3 +148,4 @@ List of Contributors * [Xizhou Zhu](https://github.com/einsiedler0408/) * [Jean Kossaifi](https://github.com/JeanKossaifi/) * [Kenta Kubo](https://github.com/kkk669/) +* [Manu Seth](https://github.com/mseth10/) diff --git a/Jenkinsfile b/Jenkinsfile index 95115cf58920..cbe63758ac70 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -7,37 +7,41 @@ mx_lib = 'lib/libmxnet.so, lib/libmxnet.a, dmlc-core/libdmlc.a, nnvm/lib/libnnvm // command to start a docker container docker_run = 'tests/ci_build/ci_build.sh' // timeout in minutes -max_time = 60 +max_time = 1440 // assign any caught errors here err = null -// set build status to success by default -currentBuild.result = "SUCCESS" // initialize source codes def init_git() { + deleteDir() retry(5) { try { timeout(time: 2, unit: 'MINUTES') { checkout scm sh 'git submodule update --init' + sh 'git clean -d -f' } } catch (exc) { deleteDir() - error "Failed to fetch source codes" + error "Failed to fetch source codes with ${exc}" + sleep 2 } } } def init_git_win() { + deleteDir() retry(5) { try { timeout(time: 2, unit: 'MINUTES') { checkout scm bat 'git submodule update --init' + bat 'git clean -d -f' } } catch (exc) { deleteDir() - error "Failed to fetch source codes" + error "Failed to fetch source codes with ${exc}" + sleep 2 } } } @@ -50,7 +54,7 @@ def make(docker_type, make_flag) { try { sh "${docker_run} ${docker_type} make ${make_flag}" } catch (exc) { - echo 'Incremental compilation failed. Fall back to build from scratch' + echo 'Incremental compilation failed with ${exc}. Fall back to build from scratch' sh "${docker_run} ${docker_type} sudo make clean" sh "${docker_run} ${docker_type} sudo make -C amalgamation/ clean" sh "${docker_run} ${docker_type} make ${make_flag}" @@ -78,11 +82,18 @@ echo ${libs} | sed -e 's/,/ /g' | xargs md5sum } // Python unittest for CPU -def python_ut(docker_type) { +// Python 2 +def python2_ut(docker_type) { timeout(time: max_time, unit: 'MINUTES') { sh "${docker_run} ${docker_type} find . -name '*.pyc' -type f -delete" - sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests --with-timer --verbose tests/python/unittest" - sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests --with-timer --verbose tests/python/train" + sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests-2.7 --with-timer --verbose tests/python/unittest" + sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests-2.7 --with-timer --verbose tests/python/train" + } +} + +// Python 3 +def python3_ut(docker_type) { + timeout(time: max_time, unit: 'MINUTES') { sh "${docker_run} ${docker_type} find . -name '*.pyc' -type f -delete" sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests-3.4 --with-timer --verbose tests/python/unittest" } @@ -90,10 +101,17 @@ def python_ut(docker_type) { // GPU test has two parts. 1) run unittest on GPU, 2) compare the results on // both CPU and GPU -def python_gpu_ut(docker_type) { +// Python 2 +def python2_gpu_ut(docker_type) { timeout(time: max_time, unit: 'MINUTES') { sh "${docker_run} ${docker_type} find . -name '*.pyc' -type f -delete" - sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests --with-timer --verbose tests/python/gpu" + sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests-2.7 --with-timer --verbose tests/python/gpu" + } +} + +// Python 3 +def python3_gpu_ut(docker_type) { + timeout(time: max_time, unit: 'MINUTES') { sh "${docker_run} ${docker_type} find . -name '*.pyc' -type f -delete" sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests-3.4 --with-timer --verbose tests/python/gpu" } @@ -150,11 +168,21 @@ try { } } }, + 'Amalgamation MIN': { + node('mxnetlinux') { + ws('workspace/amalgamationmin') { + init_git() + make('cpu', '-C amalgamation/ clean') + make('cpu', '-C amalgamation/ USE_BLAS=openblas MIN=1') + } + } + }, 'Amalgamation': { node('mxnetlinux') { ws('workspace/amalgamation') { init_git() - make('cpu', '-C amalgamation/ USE_BLAS=openblas MIN=1') + make('cpu', '-C amalgamation/ clean') + make('cpu', '-C amalgamation/ USE_BLAS=openblas') } } }, @@ -185,6 +213,7 @@ try { withEnv(['OpenBLAS_HOME=C:\\mxnet\\openblas', 'OpenCV_DIR=C:\\mxnet\\opencv_vc14', 'CUDA_PATH=C:\\CUDA\\v8.0']) { init_git_win() bat """mkdir build_vc14_cpu + call "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\bin\\x86_amd64\\vcvarsx86_amd64.bat" cd build_vc14_cpu cmake -G \"Visual Studio 14 2015 Win64\" -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 ${env.WORKSPACE}""" bat 'C:\\mxnet\\build_vc14_cpu.bat' @@ -242,31 +271,75 @@ try { } stage('Unit Test') { - parallel 'Python2/3: CPU': { + parallel 'Python2: CPU': { node('mxnetlinux') { - ws('workspace/ut-python-cpu') { + ws('workspace/ut-python2-cpu') { init_git() unpack_lib('cpu') - python_ut('cpu') + python2_ut('cpu') } } }, - 'Python2/3: GPU': { + 'Python3: CPU': { node('mxnetlinux') { - ws('workspace/ut-python-gpu') { + ws('workspace/ut-python3-cpu') { + init_git() + unpack_lib('cpu') + python3_ut('cpu') + } + } + }, + 'Python2: GPU': { + node('mxnetlinux') { + ws('workspace/ut-python2-gpu') { + init_git() + unpack_lib('gpu', mx_lib) + python2_gpu_ut('gpu') + } + } + }, + 'Python3: GPU': { + node('mxnetlinux') { + ws('workspace/ut-python3-gpu') { init_git() unpack_lib('gpu', mx_lib) - python_gpu_ut('gpu') + python3_gpu_ut('gpu') } } }, - 'Python2/3: MKLML': { + 'Python2: MKLML-CPU': { node('mxnetlinux') { - ws('workspace/ut-python-mklml') { + ws('workspace/ut-python2-mklml-cpu') { init_git() unpack_lib('mklml') - python_ut('mklml_gpu') - python_gpu_ut('mklml_gpu') + python2_ut('mklml_gpu') + } + } + }, + 'Python2: MKLML-GPU': { + node('mxnetlinux') { + ws('workspace/ut-python2-mklml-gpu') { + init_git() + unpack_lib('mklml') + python2_gpu_ut('mklml_gpu') + } + } + }, + 'Python3: MKLML-CPU': { + node('mxnetlinux') { + ws('workspace/ut-python3-mklml-cpu') { + init_git() + unpack_lib('mklml') + python3_ut('mklml_gpu') + } + } + }, + 'Python3: MKLML-GPU': { + node('mxnetlinux') { + ws('workspace/ut-python3-mklml-gpu') { + init_git() + unpack_lib('mklml') + python3_gpu_ut('mklml_gpu') } } }, @@ -313,7 +386,7 @@ try { sh "${docker_run} cpu rm -rf .Renviron" sh "${docker_run} cpu mkdir -p /workspace/ut-r-cpu/site-library" sh "${docker_run} cpu make rpkg USE_BLAS=openblas R_LIBS=/workspace/ut-r-cpu/site-library" - sh "${docker_run} cpu R CMD INSTALL --library=/workspace/ut-r-cpu/site-library mxnet_current_r.tar.gz" + sh "${docker_run} cpu R CMD INSTALL --library=/workspace/ut-r-cpu/site-library R-package" sh "${docker_run} cpu make rpkgtest R_LIBS=/workspace/ut-r-cpu/site-library" } } @@ -328,13 +401,13 @@ try { sh "${docker_run} gpu rm -rf .Renviron" sh "${docker_run} gpu mkdir -p /workspace/ut-r-gpu/site-library" sh "${docker_run} gpu make rpkg USE_BLAS=openblas R_LIBS=/workspace/ut-r-gpu/site-library" - sh "${docker_run} gpu R CMD INSTALL --library=/workspace/ut-r-gpu/site-library mxnet_current_r.tar.gz" + sh "${docker_run} gpu R CMD INSTALL --library=/workspace/ut-r-gpu/site-library R-package" sh "${docker_run} gpu make rpkgtest R_LIBS=/workspace/ut-r-gpu/site-library R_GPU_ENABLE=1" } } } }, - 'Python2/3: CPU Win':{ + 'Python 2: CPU Win':{ node('mxnetwindows') { ws('workspace/ut-python-cpu') { init_git_win() @@ -343,20 +416,30 @@ try { 7z x -y vc14_cpu.7z''' bat """xcopy C:\\mxnet\\data data /E /I /Y xcopy C:\\mxnet\\model model /E /I /Y - call activate py3 + call activate py2 set PYTHONPATH=${env.WORKSPACE}\\pkg_vc14_cpu\\python del /S /Q ${env.WORKSPACE}\\pkg_vc14_cpu\\python\\*.pyc C:\\mxnet\\test_cpu.bat""" - bat """xcopy C:\\mxnet\\data data /E /I /Y + } + } + }, + 'Python 3: CPU Win': { + node('mxnetwindows') { + ws('workspace/ut-python-cpu') { + init_git_win() + unstash 'vc14_cpu' + bat '''rmdir /s/q pkg_vc14_cpu + 7z x -y vc14_cpu.7z''' + bat """xcopy C:\\mxnet\\data data /E /I /Y xcopy C:\\mxnet\\model model /E /I /Y - call activate py2 + call activate py3 set PYTHONPATH=${env.WORKSPACE}\\pkg_vc14_cpu\\python del /S /Q ${env.WORKSPACE}\\pkg_vc14_cpu\\python\\*.pyc C:\\mxnet\\test_cpu.bat""" } } }, - 'Python2/3: GPU Win':{ + 'Python 2: GPU Win':{ node('mxnetwindows') { ws('workspace/ut-python-gpu') { init_git_win() @@ -365,19 +448,29 @@ try { 7z x -y vc14_gpu.7z''' bat """xcopy C:\\mxnet\\data data /E /I /Y xcopy C:\\mxnet\\model model /E /I /Y - call activate py3 + call activate py2 set PYTHONPATH=${env.WORKSPACE}\\pkg_vc14_gpu\\python del /S /Q ${env.WORKSPACE}\\pkg_vc14_gpu\\python\\*.pyc C:\\mxnet\\test_gpu.bat""" + } + } + }, + 'Python 3: GPU Win':{ + node('mxnetwindows') { + ws('workspace/ut-python-gpu') { + init_git_win() + unstash 'vc14_gpu' + bat '''rmdir /s/q pkg_vc14_gpu + 7z x -y vc14_gpu.7z''' bat """xcopy C:\\mxnet\\data data /E /I /Y xcopy C:\\mxnet\\model model /E /I /Y - call activate py2 + call activate py3 set PYTHONPATH=${env.WORKSPACE}\\pkg_vc14_gpu\\python del /S /Q ${env.WORKSPACE}\\pkg_vc14_gpu\\python\\*.pyc C:\\mxnet\\test_gpu.bat""" } } - } + } } stage('Integration Test') { @@ -428,9 +521,11 @@ try { } } } + // set build status to success at the end + currentBuild.result = "SUCCESS" } catch (caughtError) { node("mxnetlinux") { - sh "echo caught error" + sh "echo caught ${caughtError}" err = caughtError currentBuild.result = "FAILURE" } diff --git a/KEYS b/KEYS index 070f38d4f78e..d646bb7c3f11 100644 --- a/KEYS +++ b/KEYS @@ -189,3 +189,177 @@ TNxwR0b9K/mLKGh58n1vVT79QReQFQ4wWFyQkmFkL9ybG04wTKe00VDNP987nSBg FuSamX64+S6T8IwAuP9U =KRiV -----END PGP PUBLIC KEY BLOCK----- +pub 4096R/C65AF308 2017-08-15 [expires: 2021-08-15] +uid [ultimate] Sandeep Krishnamurthy +sig 3 C65AF308 2017-08-15 Sandeep Krishnamurthy +sub 4096R/3D0D60F6 2017-08-15 [expires: 2021-08-15] +sig C65AF308 2017-08-15 Sandeep Krishnamurthy + +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQINBFmTNosBEACuTlborR4n0MyVHTndYNVAjT3MNkJDitsSAeBpUl2wXUZNZ3YJ +iDfLsDCdzSTc/uYsfeVfRay1nYZQNBO2ikRVg5CO+Q4T7wceZ7uFwEVNnKNRiFlI +GjJMnjWa4g4GqmRLrEDJXxibFLWuCZgu8b2z/EQfCp+lBY4Q01/ag6ydoejDicRF +sHrdmt8bJolvAjNepdsW2nxOAo/j00yDyR/xdCCIXATopHdaS3isGlF+gsXr3PTT +oLUqqwst+Sx8Zc/0cCd+QXtzrb5jImtKHTj9nQJpznHxWeGhQsdd6Hvt76lrJBRm +MKM4Ti+jzy2yCs+VLOpqiY3AUuleNELQ6LgGgZGDY4doLtliSjpEiddHVkXNV+et +gq114Ucr86pPPS7I92yx3FES9uforljzZB0MZRDv3feaBZKy8+HR040I2/PSe3eD +PU7qb4Sj6vUhxztLGAKjKWFt+DbFuUZqS8mGCk1fFId49+U/XQvjbmy7GfTRXZ05 +XWwf6SAerh8gDBxFm60ALz+7LiJYy6D8HBAE+HTLf4/FK3o1cuW9niHWO/7RdHiC +XW9N98dwPm49nn+bXan45lT85zTJhUOWY5/PQMitj68D4Z6EHDnFaSBTcvvjCor9 +sJSJKh8p36df489xD3fe7D7ckzu1J7STGkvarQ+wkWTrdCK6dzFmo0cnzwARAQAB +tCZTYW5kZWVwIEtyaXNobmFtdXJ0aHkgPHNrbUBhcGFjaGUub3JnPokCPQQTAQoA +JwUCWZM2iwIbAwUJB4YfgAULCQgHAwUVCgkICwUWAgMBAAIeAQIXgAAKCRBRjac6 +xlrzCOMlD/0dq36zb/lmyBbpd3Xznag4YZyLbO7oR7IwKK6/X8VMOYX3Jd8bXncR +w09G/03or2j5MZIfhYH0yOEUHhXae8Ewi2/kGwMGH8nEc9njSv4cZKx76iQ8OBcL +/V3DZZEc1h7bDaqheOiWpfSEnSg3SO9LyHDh1s+7UomIccFQgBCUg/BpIB2A1i1/ +aAAPeqBWqEqOIC+QHlXL8RXotC4K/h+qc8XVa5GQKuAuJZNb6P4twiDXzJaeFYuE +x6ZeJ7aTJW8cnG/5ShXyrjd/awdpnuUZNoUBVF/ZIxBNGJSD6005BFIysIVFIyIz ++Qf+SLyMdRsAZQFm5xg9HkBRz7/YUY5q1FZz8VBl6XzZLmqpU9siB4O6jzbDQ2BW +fng8FR1kQvaHqIDA3zCDslas1imlcHqJjmKVEmoVB77ug3aTuq3o817ExrUsjciZ +CUOedZ3TnmF3I/YLryqKKUzGSzrNuEFa3lRzL7pULKsRzoWBdbhW3XskKWux1SlP +OWEr/lm8Yg+P2wDikY7jYQeJU/iZKc3cEUbo/edLA5+12mbL3cUWP/If2+MXc04f +nbhKZtb0jcB91KSUgMi+TP/IM9l5w3q+qmG6BDyBwwE0rf/VVvOqh9Ayt+iyLwfz +VLkEVsTpqsjBxCEd/GAUoIBK6cCchDmCbKMgQqWySxaA+gb6oul/abkCDQRZkzaL +ARAAweT6N1419HCIODjuUaQNFBDuu+IjROYllJCbqaHxXoj1uvOQD2wMxZo43IIk +NqxbAgV1z90phiOLBC44z2MNvUj2yx41iZ1zkRkBRTx+Q3oUm9Vcon9a7blfh/mW +/YhPbs2E1ukK/tL317rerdrNet3EhiqglHU3a6I4D/oPnQ/8t9UcYiGVULrhUVTc +GoNdkz+Fu3Fmai/stnUNnYoMQIeBBukoF++SKfxm/y57LeScMbqJX2lPEVRm4ehx +SNlO38/p42aVsNzTuWlb0otGh+WaHZvmfLpckjZVCk+rGqoFUkx+BWfoyLv8VOPJ +qY4hQ0SjAeEcYwK2/Z99d2/0BN0zemldjvtT7Unrnm7hni3Ore9QaHCqhvyfh9P2 +hcdg9FS8VOEtxjdW/IAWEcT62drZz4WAarv1a8gxogwXtjt5acWMgkwuAXwhQM/D +g4KkKMtT182MmF/cVn4RfGkRihnbHCXrp+sej19I3hUaB855PfIZ0v8N7fU2jM0h +cv84ha6w0bV0Ab4HFqo+6NT4c4yPh/PgotEXYgkIQauPkicgkuETfOyuYLtdcPGj +c3kP4HnJ6JXVq7VlI/kEO+LWEi8ygsUP5l3dfLPf1O6BOANQAF7tI/5g1pB75miX +Y1yx9liFtJUuhXs3IN/6d1I2ZA3Q17LWbb6uh9ecINiLSPsAEQEAAYkCJQQYAQoA +DwUCWZM2iwIbDAUJB4YfgAAKCRBRjac6xlrzCIRTD/9gEejzaO7f4VnmovrYkoBt +wF2b2z7F9RhdxgCAvBfuQ7mI6WzH2chYP7SwSjugB+XPFQ5fM0sa8UHKyBryGBfx +aQaxS/1lFyIM7g/zNCNxHQN9ZLmIK2i+VSKZYjbZDh6ESnT/h2Shk5IN/8ho11+e +nGehxVDDEpEBAtwETwLOh7v9+uXfn6uDchBZJIT/SwqONUWsOQrsB6JEPKoSCaM6 +PcRjmU+Vd8DLkxrvQe0PqlYiiiNZiRv1WJ2ywPXTU9dvsAdRgvPWdNGcPhjjQwuj +e95OzvNGGdcVPX+cudfiSm9/BTCEBpUkjg8rFSjh3YisJ18TuV9DzG1X/bvWLXaY +zACetOaMYkvMboZfwHdbCPpD/MGnqAjFWoRvdKlV0ZZfwUOzEEp6NHFfPw1+9px5 +BremMOrPkPZALGs+7mb9s8tvC2OKMkbXzqDsXnZUS5p3QgrensHBQt9FyaLlE0Zi +x+/cW/NpygBSjWyGX3ahdS30U4rl5BfkfyzvCkohWSoaVODf/2HjxEyVVONDShBf +kpbpFOW5jAfVjs2ZqlVsUkNCL9wSPKUU30szkZ1AT2BOivJoCJv8WgF7np1LAZCa +C0d/3i4v3h6AgzUqDjDILFCNiDgsaKtdoB5dK1tgjC3wX42lRAXDsTnF78c5iGk4 +6gcqWf5YDM+zv+yvsafmug== +=my1/ +-----END PGP PUBLIC KEY BLOCK----- +pub 4096R/703DF31B 2017-10-16 +uid Chris Olivier (CODE SIGNING KEY) +sig 3 703DF31B 2017-10-16 Chris Olivier (CODE SIGNING KEY) +sub 4096R/7B90EEF2 2017-10-16 +sig 703DF31B 2017-10-16 Chris Olivier (CODE SIGNING KEY) + +-----BEGIN PGP PUBLIC KEY BLOCK----- +Version: GnuPG v1 + +mQINBFnk4CsBEADBZBL8aDcNMHfpxd/gCPYf+sw0EyWJZ+whUwwod8TmZohJr84a +Lb3RTgCfOo5XU7DYYZoQGJsJsTgPw4bAQEPbHjNr0NNKR1IphYo8mhKxhzrocNDO +QI9X/PyyZRNdW8hRHzBZDB+Wrb3dp+J1a6Zn+hK3LhQ+I2HHBM/oWx9j5+0NqKs+ +kqOIC7H3hY1+ZG/jhaG4L1/VMShsPrDD/yiGPUyF2gq345Q+5VUlMXRy1iHJjqC2 +syEUS4m5xL6csqz4JYZlPNKYKwBzzJ5O3SKaLGESLMZTmdjvXmTb4nBbj1ioQcwh +N3IRaBFp+0Bxxcv0Jp/c9GLJF5u1aKOmYmlpyeQLF8APrABtkoX1/hCQhqk9nlpb +Y5+gq+VolJF6W78QxEVdpBBguFS+qSnXY+6YxrMel+XPKwgwJw5GDCd4tcy0dkoM +PLyA5azyOiQOzxHbEi0SheD1kcSSW3sqTdCXzIxHc9Sdwb6sHiboPFJSwIl79R1j +k0WztKIccVrlgcpiYyzVHr+bRRPgcW3P83mp3YVzsUdFdxYiVI6drAkcOD8NPumB +ehP8Ih1GOejemN5RIzq1ZLhZOBH1Y8MM44R/Z8YthzobXbwMQJY8n9jvUi81/DC3 +NX2MWui4AWXdzIgQxMi3l9n+1LOQXhEUfJW0I2gw+QqKFUeSnc+mX/SmDwARAQAB +tDhDaHJpcyBPbGl2aWVyIChDT0RFIFNJR05JTkcgS0VZKSA8Y2pvbGl2aWVyMDFA +Z21haWwuY29tPokCOAQTAQIAIgUCWeTgKwIbAwYLCQgHAwIGFQgCCQoLBBYCAwEC +HgECF4AACgkQgP2B13A98xsvKhAAn8mouJvwdFp6Pm7BHOpTA1uKcQq0V/nctaDB +2laT3pklv+vFWioviFsTcwAyQ8JW80/Sl+ENRt06vqtUXWOFaJn14mLhASEveur0 +O7itKXePAb77UdoPKvCixnGldycnocxYVekcJJ11YQR1L3cN76dC3E3G9nVZFkXI +TE6unN0XTJ0BRsihZcvE896v7zoWwa4RqaFL2EKiROsQr0WuK345Cj5nhVlISQs1 +jPcg52PYHSomIencNVWvuBWeS6LowBkC/VY3tDyI6JYBpjQxeJN6ctd/U4SLoaDA +EAbkbJqj4bvXVBGBpCdZdsv9RXldGAw8XOSWjEmJDBV5x1KzTyrbqPAlV9GPvV7m +3LiB2fRdkTLt9Z4UnOkOtvOQX9Zo7O5vBMbTFGpJKIqeJ6SWZjol35PQiLJ8vJdh +U4qLLBmK6rTTipDXV74WYjYoO0f9DPBdgpY/fR6JZRmyN4fVVc3fGvJZZDyQv+KT +lnfaXFvlSPGE6QsHO7ghbaKvRla4mUpf9BJq7YWiA3ghLVClHC5erWb4daxpn4vN +nby4Djk2CanndIXwLOL3WDsGaUB/JCIvS8AYkKqk1nCgc4r/8jjUdYIc0Qll1Zn6 +5HBu5ju7gEtVl2QHIs2VI7YhJa2Z6mKUymhwkT3WBDpMDn8rgAu1UsLalhkFReF/ +jVh/mTW5Ag0EWeTgKwEQALdJpNgiIGWYZDj892gKk+/zsRgaMMmvfEEdfpY5mWvr +t8L1NC/A+/K4oQgBaFvJyXHS1FGc3e2Te2Vqi0raoUjSzpX0KcsemeAxbEtpt14A +xZ/LSz9R1PZEgnGwgkk2QaGDJThHAjrLUZRT08NORb35hI33yPhMnfHt/flW7D+B +ZtJRk0wD/O+B5RGd73Cn5gHGZmoOzEVw9SmmrIrUS+meZOwIadJ7nu0VkijtpghM +CO1KE/D3ocv4RhtNC0T3xP5HN961kBYO2s+3ddW7uYRlB78mEdFZ/bbIWqedXLGt +IvSm4bEoySfPbmFzGTBAXdUDCMNeh3Xtw5UZaDEdiq4hgnfswfH301QMQJzWQTzp +zY3LhiPGs4fP0IBzjWygIBxktRJKIfQWYE6J9cNC+Xe+mrfpqpwNMsPt78flwghD +sOFmvrwGYnlx8mehnwrW5OIQFqucLVXoDUKi/qmCub5QcJEXmEqfiVlAyDlhrH6L +t8S8lTDP08WJtN9tlRCL/66YUGSGoCGw3R6UUxhk1qhTHU/vlk5RTzO1I/SaVkQs +Lrn0sQwQlunHzhtawne2vof9/kxCzV6vvnrrLPa7c/Y506IVDU54KzMAKdCuUUaK +DmGo+Iu1fweRd5360Put4Fgv21fC1W3/yb0ZMqefoaH4ekIcbQWubAt896K2wZN/ +ABEBAAGJAh8EGAECAAkFAlnk4CsCGwwACgkQgP2B13A98xu7Qg/+MrduvSlvrRka +U93vaumhV25uzjFZnEsMBe61xZp4NrF6BhSdWJgSZXI+8xVblvNcTemIjCuRAXIt +zqmN9O6UOEnRn4B0IOvXPAw/RT+XY6NCLb01Nd7EcIo0cAdQi4hWaPk0caZ5ABSu +Ss5sFzsYyShhGNbo0KKYqGsYc4qz7mOr6QG5/opmi4UWIEr9LZkIjlY/TT6ltSyd +GIVb//KeUobVxEpMe1iET7YB/8Yvad5TWXpcSdIpwfj6l3W3HsLHbx0zhndFuWOY +npLS+f3/SFu60Bq0rmF3W12CmeZTlgtSvJowhhfnnmBIo8cZz+G6EYutmgukd8QE +lD1E967Tbh4aJnKzODhpF/uF+zxwWw7gBvDOTMKblOZBTSMKYutuSuCi3Ysfw02g +XYjMQkUQMJIxj5AoVipGDRJYKCAu508CLsvGJGrJF+nehfsbdd8Kye4SBaOWA94Y +8JuGTFarFyRlKshgRN0qGVs2wo64Se3p9EAvrIwqqyItJw7dTmxXQlkbCWSfEdjM +IjljtjhIMhMLB5rf8BPCZ6og5fKqUF5LOp8DujG2DGa9ZhYWTzOO/UGZP60qGTot +5bm+5Ovl57Yk4UUUSC+Uk+yZ9QOAdOVtbTX+SbmNCUmZ+mTcB6A/XoA5jKsVUyZm +GZZVNUU0hQYfulYDY5E8fJ4Olzpf5OE= +=WmLB +-----END PGP PUBLIC KEY BLOCK----- +pub rsa4096 2017-11-21 [SC] + 331E9A5ED727FADD429B2894F2F1EAB589EBCFB1 +uid [ultimate] Haibin Lin +sig 3 F2F1EAB589EBCFB1 2017-11-21 Haibin Lin +sub rsa4096 2017-11-21 [E] +sig F2F1EAB589EBCFB1 2017-11-21 Haibin Lin + +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQINBFoTp3YBEACiGa++rsTjQal+33xADuWxzN9L8bTkMu4uFJqYvyNP2z1Q0fcM +DFjLJcvsc3ODSlkDGlkrtFpYlqkBTFERABU19TcAQ5FYFu1uULUybtHm55h6OKAm +1qfSRcKvdidDRytf7XAnhK/jvjtY71EQZUz2OtvKj0p93C22JcaJasKjHEF+8Jv0 +1rvV4BsZcY3hl9ORbv+nvBB6PX6zkpfhh0edVl50yzJEM34dtBZ1CTVlcJhIj0yo +LEZkt+zKEz5C3/D5OgM2DoclUInAvPeIGXvOgoQi9he4YjMppC3fmcA9O+sJ8XFh +dqNxcI+ddcvg84g4ntC2iJb8OOX75xkkoIsJXhZgwxBbdnwINNY6Eqqyx2lMvGRI +BLTSxLKsfX/mCmW9mwNrKxfrBIb107ldxwfo+13/Vh45nIlhM0yxfhlukHmYEHp+ +G+T+aD67t0HHZHr27M2x0qTdKkRoI+7xYTUvu+OmObJej48UDhi4GMAjQ61TeLm1 +OyetyMoKpB+Cah1n0O5j6nDPRJBS9OPi361DIZRhlg4IkrbIP5MHs+Zvof8O04xq +GRfYAqEhT6rP98TidpHVhFEV3CrDLVDJLZ3Vqglj2iyNOjEjF1GJJBaFWUoXhKPs +WVZMfgpkaXRwng6r6ieRmmt/Ci//JV6ztkwKk7e0OQJBqbwA0A7lqx7j2QARAQAB +tCVIYWliaW4gTGluIDxsaW5oYWliaW4uZXJpY0BnbWFpbC5jb20+iQJOBBMBCAA4 +FiEEMx6aXtcn+t1CmyiU8vHqtYnrz7EFAloTp3YCGwMFCwkIBwIGFQgJCgsCBBYC +AwECHgECF4AACgkQ8vHqtYnrz7GFWA//Z6YTxtlZSHFlqkAFFOsDtV3DghSC8zJe +LRm508fZn53e9a3fUvT9U1sUfW8DI69GRK+IBkvP5hcmMb1U4N3MxzX4YC/13wMY +3BtUbCIpD8uBJOtuC7fPAH//Ij/4wv4Fp1/3WL6y04+mJIayMyKqmc3nBLD0rVWC +AHEsPR7tiDDMltrzxMNHIJCDaiClJzKiCrQ4owKBOnY2TU/E64xyk5IwAczz2lCY +712h6+q2mO7F672Yt6b6pqmugnFqWdqUj9dx1V9x//4y/k0DefF7G/1Lk1lh4Eyo +aUx3jve/74Y87ICW1AhR2/TvdfWbsAkPyfy98k1SLR/9BulSIXIFeduxaFl7M3D8 +98aB5pqO8tPl2BFUJwh/uywDx0994MjQ8Xvrjmb9WJOAx9OyokivVCvmqJOkBzve +Fk/4KUHTFTGQCoXbbBlIQTC9hBd8c1S4t0gFGbcjlqTvr/ZnTdpSgbzZ/96/SVRm +dYOgjjpkrBOZgJPwsmmRQ2MufeZUtmkFSqdIRLGBNTefsMDDCGvyNeR/XCgM5Zfy +39PX/GHFKgq5Ei2ywEyZOGLCK5MwA12fMExYoedazFFjv6ApGpz+j831A2z/crEo +bRpVvd+rFzGnCKDq5viUD7cRzIPLVltYCNEayEgWta4KI+00/ayaaT6sM7N7oM32 +r01Wv02FvdG5Ag0EWhOndgEQAPiiTvmo9fZNW/5IxL7kDR6u9FEmEb2EZI+KxzbN +RYYY0IPsnA8TY9Rzj9D7xV8Vmf2Pd5SUyCtVwLfBKhadLh755NeehNXWIbW802gH +bvbykL/Zcn98oiLOVfK/Op/6MVpDuGXZ6CpDbQDSn6ne6/CWQnoz1+Wo+wbs1TOy +AhO6xKa20NtGIZrfZD01dSzRC5DMJD3GK1j6HdVUz5piwiTsGvGRJ3ZLfObdlHGn +CTMA39Jb8zQ0QtWPsOre0Nz2JQ53awMBaUhan5MeoOYp6ccsgD1BigyxmKb8iIDN +NM/Iwi0Ib5L4AiGh6fQFf0WF8p74yIn1WgFcWxJXR1ZzvMDDHXqq97SQtbr9FKhu +xrceh/92Ga4ruAJRCbMtmOTUP4APTeT4csANdgJxtW+I4QAp01BQSl75pB2QDlam ++tqePQDboAGc78Ck6096wML0ZMKDDxXPrI67uppuM02FYuJ41ZQjOytigeoGS88g +ByZwPcFIT+5XgtNC0BH7U9VIkiap5U00lykzEjcRjrZTtKqHdeFPbSEpv1QfIcLG +Ra439g9acRHX82sVzhzZk5uu9QKyDN1EpuWoLOaOrICHcMSC7GkVXS8+/7TX0vAN +vn/51fb+tHJekGfaPhsPuIbSba2kmUy8sSS/6JJHkJ1aEFigAPbwUbZTqNlb4IRm +FBVBABEBAAGJAjYEGAEIACAWIQQzHppe1yf63UKbKJTy8eq1ievPsQUCWhOndgIb +DAAKCRDy8eq1ievPsbrpEACQ8HqAvq3NuiM00WyHla7VtghCWVEmRozbYc4dR7u+ +sTQrVgbLfgR5zeSWCMHpEcaN/RS58O/i1Dk0DLHTu3NrarzrkEPlHwIgJQ7orxFD +YW3Z2Ytk40uKex4ou/8VzvXTpj1u8d/GHgGdvChBmtw5FaMgc8PBi4FnlIS5cAGU +1ca1RwMX0WpFsp9HgrQLVxgkDs/m7oRSmC5GvPDIpb5S9QFzJKYKTJxSfXXO6hCk +FGAGHWjVC26a/wSUtZQfb3G9sYZJuKUOwr4tpz1y6Ronc34cZYi1FlKWJuz01w4s +4PKjFG/wbYSd+QLfftyyVPMLdY+wCwc8O59QqKx5Rj8HQLxIwSL3chhmdAHCmejM +zKCpkFyLOc6+Wjet6hD6X3EsjIee1AAy22D24EaLJsju9zR/khJFS4K76aQX7dYN +aB3C7S5HGxvYGSqfnn4eBaEzrSOde7HEcqYpYKxS+jB1c4X4W91NSTsqDd0QJMVF +35eKfhWj+X6jWIC+48kfzypXdOCnPbto7wrr40yYCHw3XSXj40H5dWSsWEZVmS+s +Dzz6zy9maHVyXa/rNsL7OjqimtKad65r/wfSFPPIcR1jJfP4GMNHV0TYqxdyDaXg +iEVpHzOV7gd75fJbOvoNxNZj20Yj5sg8OCwbv8PxLXEcBFs7hhjQMhVRsjpNYzAR +Iw== +=rMlc +-----END PGP PUBLIC KEY BLOCK----- diff --git a/LICENSE b/LICENSE index d64569567334..1a02899feead 100644 --- a/LICENSE +++ b/LICENSE @@ -96,6 +96,7 @@ Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works @@ -200,3 +201,227 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + + ======================================================================= + Apache MXNET (incubating) Subcomponents: + + The Apache MXNET (incubating) project contains subcomponents with separate copyright + notices and license terms. Your use of the source code for the these + subcomponents is subject to the terms and conditions of the following + licenses. + + ======================================================================== + Apache-2.0 licenses + ======================================================================== + + The following components are provided under an Apache 2.0 license. + + 1. MXNet Cpp-package - For details, /cpp-package/LICENSE + 2. MXNet rcnn - For details, see, example/rcnn/LICENSE + 3. scala-package - For details, see, scala-package/LICENSE + 4. Warp-CTC - For details, see, src/operator/contrib/ctc_include/LICENSE + 5. dlpack - For details, see, dlpack/LICENSE + 6. dmlc-core - For details, see, dmlc-core/LICENSE + 7. mshadow - For details, see, mshadow/LICENSE + 8. nnvm/dmlc-core - For details, see, nnvm/dmlc-core/LICENSE + 9. nnvm - For details, see, nnvm/LICENSE + 10. nnvm/tvm - For details, see, nnvm/tvm/LICENSE + 11. nnvm/tvm/HalideIR/LICENSE - For details, see, nnvm/tvm/HalideIR/LICENSE + 12. nnvm-fusion - For details, see, nnvm/plugin/nnvm-fusion/LICENSE + 13. ps-lite - For details, see, ps-lite/LICENSE + + ======================================================================== + MIT licenses + ======================================================================== + + 1. Fast R-CNN - For details, see example/rcnn/LICENSE + 2. Faster R-CNN - For details, see example/rcnn/LICENSE + 3. tree_lstm - For details, see example/gluon/tree_lstm/LICENSE + + ======================================================================== + JQuery License (MIT license) + ======================================================================== + jQuery JavaScript Library v1.11.1 + http://jquery.com/ + + Includes Sizzle.js + http://sizzlejs.com/ + + Copyright 2005, 2014 jQuery Foundation, Inc. and other contributors + ---- + Released under the MIT license + MIT License + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ---- + http://jquery.org/license + + Date: 2014-05-01T17:42Z + + ======================================================================== + NVIDIA Licenses + ======================================================================== + + 1. Warp-CTC + For details, see, src/operator/contrib/ctc_include/contrib/moderngpu/LICENSE + + /****************************************************************************** + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + + 2. CUB Library + For details, see, cub/LICENSE.TXT + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the NVIDIA CORPORATION nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + ======================================================================== + Other Licenses + ======================================================================== + + 1. Caffe + For details, see, example/rcnn/LICENSE + + LICENSE + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + CONTRIBUTION AGREEMENT + + By contributing to the BVLC/caffe repository through pull-request, comment, + or otherwise, the contributor releases their content to the + license and copyright terms herein. + + + 2. MS COCO API + For details, see, example/rcnn/LICENSE + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF TH +E USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + The views and conclusions contained in the software and documentation are those + of the authors and should not be interpreted as representing official policies, + either expressed or implied, of the FreeBSD Project. + + + 3. Sphinx JavaScript utilties for the full-text search + + For details, see, docs/_static/searchtools_custom.js + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + diff --git a/Makefile b/Makefile index 33151e574ea7..ceed6450436c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,11 @@ ROOTDIR = $(CURDIR) +ifeq ($(OS),Windows_NT) + UNAME_S := Windows +else + UNAME_S := $(shell uname -s) +endif + ifndef config ifdef CXXNET_CONFIG config = $(CXXNET_CONFIG) @@ -36,8 +42,8 @@ include $(config) ifeq ($(USE_MKL2017), 1) # must run ./prepare_mkl before including mshadow.mk - RETURN_STRING = $(shell ./prepare_mkl.sh $(MKLML_ROOT)) - MKLROOT = $(firstword $(RETURN_STRING)) + RETURN_STRING := $(shell ./prepare_mkl.sh $(MKLML_ROOT)) + MKLROOT := $(firstword $(RETURN_STRING)) export USE_MKLML = $(lastword $(RETURN_STRING)) endif @@ -72,9 +78,14 @@ ifeq ($(USE_PROFILER), 1) CFLAGS += -DMXNET_USE_PROFILER=1 endif +# CFLAGS for segfault logger +ifeq ($(USE_SIGNAL_HANDLER), 1) + CFLAGS += -DMXNET_USE_SIGNAL_HANDLER=1 +endif + # Caffe Plugin ifdef CAFFE_PATH - CFLAGS += -DMXNET_USE_CAFFE=1 + CFLAGS += -DMXNET_USE_CAFFE=1 endif ifndef LINT_LANG @@ -91,7 +102,9 @@ else endif ifeq ($(USE_OPENMP), 1) - CFLAGS += -fopenmp + ifneq ($(UNAME_S), Darwin) + CFLAGS += -fopenmp + endif endif ifeq ($(USE_NNPACK), 1) @@ -105,11 +118,21 @@ ifeq ($(USE_MKL2017), 1) CFLAGS += -I$(ROOTDIR)/src/operator/mkl/ CFLAGS += -I$(MKLML_ROOT)/include LDFLAGS += -L$(MKLML_ROOT)/lib -ifeq ($(USE_MKL2017_EXPERIMENTAL), 1) - CFLAGS += -DMKL_EXPERIMENTAL=1 -else - CFLAGS += -DMKL_EXPERIMENTAL=0 + ifeq ($(USE_MKL2017_EXPERIMENTAL), 1) + CFLAGS += -DMKL_EXPERIMENTAL=1 + else + CFLAGS += -DMKL_EXPERIMENTAL=0 + endif + ifeq ($(UNAME_S), Darwin) + LDFLAGS += -lmklml + else + LDFLAGS += -Wl,--as-needed -lmklml_intel -lmklml_gnu + endif + LDFLAGS += -liomp5 endif + +ifeq ($(USE_OPERATOR_TUNING), 1) + CFLAGS += -DMXNET_USE_OPERATOR_TUNING=1 endif # verify existence of separate lapack library when using blas/openblas/atlas @@ -146,7 +169,60 @@ ifeq ($(USE_CUDNN), 1) LDFLAGS += -lcudnn endif +# gperftools malloc library (tcmalloc) +ifeq ($(USE_GPERFTOOLS), 1) +# FIND_LIBNAME=tcmalloc_and_profiler + FIND_LIBNAME=tcmalloc + FIND_LIBFILEEXT=so + FIND_LIBFILE=$(wildcard /lib/lib$(FIND_LIBNAME).$(FIND_LIBFILEEXT)) + ifeq (,$(FIND_LIBFILE)) + FIND_LIBFILE=$(wildcard /usr/lib/lib$(FIND_LIBNAME).$(FIND_LIBFILEEXT)) + ifeq (,$(FIND_LIBFILE)) + FIND_LIBFILE=$(wildcard /usr/local/lib/lib$(FIND_LIBNAME).$(FIND_LIBFILEEXT)) + ifeq (,$(FIND_LIBFILE)) + USE_GPERFTOOLS=0 + endif + endif + endif + ifeq ($(USE_GPERFTOOLS), 1) + CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free + LDFLAGS += $(FIND_LIBFILE) + endif +endif + +# jemalloc malloc library (if not using gperftools) +ifneq ($(USE_GPERFTOOLS), 1) + ifeq ($(USE_JEMALLOC), 1) + FIND_LIBNAME=jemalloc + FIND_LIBFILEEXT=so + FIND_LIBFILE=$(wildcard /lib/lib$(FIND_LIBNAME).$(FIND_LIBFILEEXT)) + ifeq (,$(FIND_LIBFILE)) + FIND_LIBFILE=$(wildcard /usr/lib/lib$(FIND_LIBNAME).$(FIND_LIBFILEEXT)) + ifeq (,$(FIND_LIBFILE)) + FIND_LIBFILE=$(wildcard /usr/local/lib/lib$(FIND_LIBNAME).$(FIND_LIBFILEEXT)) + ifeq (,$(FIND_LIBFILE)) + FIND_LIBFILE=$(wildcard /usr/lib/x86_64-linux-gnu/lib$(FIND_LIBNAME).$(FIND_LIBFILEEXT)) + ifeq (,$(FIND_LIBFILE)) + USE_JEMALLOC=0 + endif + endif + endif + endif + ifeq ($(USE_JEMALLOC), 1) + CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc \ + -fno-builtin-free -DUSE_JEMALLOC + LDFLAGS += $(FIND_LIBFILE) + endif + endif +endif +# If not using tcmalloc or jemalloc, print a warning (user should consider installing) +ifneq ($(USE_GPERFTOOLS), 1) + ifneq ($(USE_JEMALLOC), 1) +$(warning WARNING: Significant performance increases can be achieved by installing and \ +enabling gperftools or jemalloc development packages) + endif +endif ifeq ($(USE_THREADED_ENGINE), 1) CFLAGS += -DMXNET_USE_THREADED_ENGINE @@ -166,8 +242,8 @@ endif # Sets 'CUDA_ARCH', which determines the GPU architectures supported # by the compiled kernels. Users can edit the KNOWN_CUDA_ARCHS list below -# to remove archs they don't wish to support to speed compilation, or they -# can pre-set the CUDA_ARCH args in config.mk for full control. +# to remove archs they don't wish to support to speed compilation, or they can +# pre-set the CUDA_ARCH args in config.mk to a non-null value for full control. # # For archs in this list, nvcc will create a fat-binary that will include # the binaries (SASS) for all architectures supported by the installed version @@ -175,13 +251,13 @@ endif # If these kernels are then run on a newer-architecture GPU, the binary will # be JIT-compiled by the updated driver from the included PTX. ifeq ($(USE_CUDA), 1) -ifeq ($(origin CUDA_ARCH), undefined) +ifeq ($(CUDA_ARCH),) KNOWN_CUDA_ARCHS := 30 35 50 52 60 61 70 # Run nvcc on a zero-length file to check architecture-level support. # Create args to include SASS in the fat binary for supported levels. CUDA_ARCH := $(foreach arch,$(KNOWN_CUDA_ARCHS), \ - $(shell $(NVCC) -arch=sm_$(arch) -E --x cu /dev/null >/dev/null 2>&1 && \ - echo -gencode arch=compute_$(arch),code=sm_$(arch))) + $(shell $(NVCC) -arch=sm_$(arch) -E --x cu /dev/null >/dev/null 2>&1 && \ + echo -gencode arch=compute_$(arch),code=sm_$(arch))) # Convert a trailing "code=sm_NN" to "code=[sm_NN,compute_NN]" to also # include the PTX of the most recent arch in the fat-binaries for # forward compatibility with newer GPUs. @@ -189,7 +265,7 @@ ifeq ($(origin CUDA_ARCH), undefined) # Add fat binary compression if supported by nvcc. COMPRESS := --fatbin-options -compress-all CUDA_ARCH += $(shell $(NVCC) -cuda $(COMPRESS) --x cu /dev/null -o /dev/null >/dev/null 2>&1 && \ - echo $(COMPRESS)) + echo $(COMPRESS)) endif endif @@ -231,20 +307,18 @@ PLUGIN_OBJ = PLUGIN_CUOBJ = include $(MXNET_PLUGINS) -# scala package profile -ifeq ($(OS),Windows_NT) +ifeq ($(UNAME_S), Windows) # TODO(yizhi) currently scala package does not support windows SCALA_PKG_PROFILE := windows else - UNAME_S := $(shell uname -s) ifeq ($(UNAME_S), Darwin) WHOLE_ARCH= -all_load NO_WHOLE_ARCH= -noall_load SCALA_PKG_PROFILE := osx-x86_64 else - SCALA_PKG_PROFILE := linux-x86_64 WHOLE_ARCH= --whole-archive NO_WHOLE_ARCH= --no-whole-archive + SCALA_PKG_PROFILE := linux-x86_64 endif endif @@ -255,22 +329,37 @@ ALL_DEP = $(OBJ) $(EXTRA_OBJ) $(PLUGIN_OBJ) $(LIB_DEP) ifeq ($(USE_CUDA), 1) CFLAGS += -I$(ROOTDIR)/cub ALL_DEP += $(CUOBJ) $(EXTRA_CUOBJ) $(PLUGIN_CUOBJ) - LDFLAGS += -lcuda -lcufft + LDFLAGS += -lcuda -lcufft -lnvrtc SCALA_PKG_PROFILE := $(SCALA_PKG_PROFILE)-gpu + ifeq ($(USE_NCCL), 1) + ifneq ($(USE_NCCL_PATH), NONE) + CFLAGS += -I$(USE_NCCL_PATH)/include + LDFLAGS += -L$(USE_NCCL_PATH)/lib + endif + LDFLAGS += -lnccl + CFLAGS += -DMXNET_USE_NCCL=1 + else + CFLAGS += -DMXNET_USE_NCCL=0 + endif else SCALA_PKG_PROFILE := $(SCALA_PKG_PROFILE)-cpu + CFLAGS += -DMXNET_USE_NCCL=0 endif -# For quick compile test, used smaller subset -ALLX_DEP= $(ALL_DEP) - -ifeq ($(USE_NVRTC), 1) - LDFLAGS += -lnvrtc - CFLAGS += -DMXNET_USE_NVRTC=1 +ifeq ($(USE_LIBJPEG_TURBO), 1) + ifneq ($(USE_LIBJPEG_TURBO_PATH), NONE) + CFLAGS += -I$(USE_LIBJPEG_TURBO_PATH)/include + LDFLAGS += -L$(USE_LIBJPEG_TURBO_PATH)/lib + endif + LDFLAGS += -lturbojpeg + CFLAGS += -DMXNET_USE_LIBJPEG_TURBO=1 else - CFLAGS += -DMXNET_USE_NVRTC=0 + CFLAGS += -DMXNET_USE_LIBJPEG_TURBO=0 endif +# For quick compile test, used smaller subset +ALLX_DEP= $(ALL_DEP) + build/src/%.o: src/%.cc @mkdir -p $(@D) $(CXX) -std=c++11 -c $(CFLAGS) -MMD -c $< -o $@ @@ -307,9 +396,9 @@ lib/libmxnet.a: $(ALLX_DEP) ar crv $@ $(filter %.o, $?) lib/libmxnet.so: $(ALLX_DEP) - @mkdir -p $(@D) - $(CXX) $(CFLAGS) -shared -o $@ $(filter-out %libnnvm.a, $(filter %.o %.a, $^)) $(LDFLAGS) \ - -Wl,${WHOLE_ARCH} $(filter %libnnvm.a, $^) -Wl,${NO_WHOLE_ARCH} + @mkdir -p $(@D) + $(CXX) $(CFLAGS) -shared -o $@ $(filter-out %libnnvm.a, $(filter %.o %.a, $^)) $(LDFLAGS) \ + -Wl,${WHOLE_ARCH} $(filter %libnnvm.a, $^) -Wl,${NO_WHOLE_ARCH} $(PS_PATH)/build/libps.a: PSLITE @@ -346,7 +435,7 @@ test: $(TEST) lint: cpplint rcpplint jnilint pylint cpplint: - python2 dmlc-core/scripts/lint.py mxnet cpp include src plugin cpp-package tests \ + dmlc-core/scripts/lint.py mxnet cpp include src plugin cpp-package tests \ --exclude_path src/operator/contrib/ctc_include pylint: @@ -378,7 +467,7 @@ cyclean: # R related shortcuts rcpplint: - python2 dmlc-core/scripts/lint.py mxnet-rcpp ${LINT_LANG} R-package/src + dmlc-core/scripts/lint.py mxnet-rcpp ${LINT_LANG} R-package/src rpkg: mkdir -p R-package/inst @@ -400,17 +489,19 @@ rpkg: devtools::install_version('roxygen2',version='5.0.1',\ repo='https://cloud.r-project.org/',quiet=TRUE)}" Rscript -e "require(roxygen2); roxygen2::roxygenise('R-package')" - R CMD build --no-build-vignettes R-package - rm -rf mxnet_current_r.tar.gz + R CMD INSTALL R-package rm -rf R-package/src/image_recordio.h - mv mxnet_*.tar.gz mxnet_current_r.tar.gz rpkgtest: Rscript -e "require(testthat);res<-test_dir('R-package/tests/testthat');if(!testthat:::all_passed(res)){stop('Test failures', call. = FALSE)}" +scalaclean: + (cd $(ROOTDIR)/scala-package; \ + mvn clean -P$(SCALA_PKG_PROFILE)) + scalapkg: (cd $(ROOTDIR)/scala-package; \ - mvn clean package -P$(SCALA_PKG_PROFILE) -Dcxx="$(CXX)" \ + mvn package -P$(SCALA_PKG_PROFILE) -Dcxx="$(CXX)" \ -Dcflags="$(CFLAGS)" -Dldflags="$(LDFLAGS)" \ -Dcurrent_libdir="$(ROOTDIR)/lib" \ -Dlddeps="$(LIB_DEP) $(ROOTDIR)/lib/libmxnet.a") @@ -434,7 +525,7 @@ scaladeploy: -Dlddeps="$(LIB_DEP) $(ROOTDIR)/lib/libmxnet.a") jnilint: - python2 dmlc-core/scripts/lint.py mxnet-jnicpp cpp scala-package/native/src + dmlc-core/scripts/lint.py mxnet-jnicpp cpp scala-package/native/src ifneq ($(EXTRA_OPERATORS),) clean: cyclean $(EXTRA_PACKAGES_CLEAN) diff --git a/NEWS.md b/NEWS.md index 4f1ecd15689c..fc6b10188fc7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,121 @@ MXNet Change Log ================ -## 0.11.0-rc2 -### - Major Features +## 1.0.0 +### Performance + - Enhanced the performance of `sparse.dot` operator. + - MXNet now automatically set OpenMP to use all available CPU cores to maximize CPU utilization when `NUM_OMP_THREADS` is not set. + - Unary and binary operators now avoid using OpenMP on small arrays if using OpenMP actually hurts performance due to multithreading overhead. + - Significantly improved performance of `broadcast_add`, `broadcast_mul`, etc on CPU. + - Added bulk execution to imperative mode. You can control segment size with `mxnet.engine.bulk`. As a result, the speed of Gluon in hybrid mode is improved, especially on small networks and multiple GPUs. + - Improved speed for `ctypes` invocation from Python frontend. +### New Features - Gradient Compression [Experimental] + - Speed up multi-GPU and distributed training by compressing communication of gradients. This is especially effective when training networks with large fully-connected layers. In Gluon this can be activated with `compression_params` in Trainer. +### New Features - Support of NVIDIA Collective Communication Library (NCCL) [Experimental] + - Use `kvstore=’nccl’` for (in some cases) faster training on multiple GPUs. + - Significantly faster than kvstore=’device’ when batch size is small. + - It is recommended to set environment variable `NCCL_LAUNCH_MODE` to `PARALLEL` when using NCCL version 2.1 or newer. +### New Features - Advanced Indexing [General Availability] + - NDArray now supports advanced indexing (both slice and assign) as specified by the numpy standard: https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.indexing.html#combining-advanced-and-basic-indexing with the following restrictions: + - if key is a list type, only a list of integers is supported, e.g. `key=[1, 2]` is supported, while not for `key=[[1, 2]]`. + - Ellipsis (...) and np.newaxis are not supported. + - `Boolean` array indexing is not supported. +### New Features - Gluon [General Availability] + - Performance optimizations discussed above. + - Added support for loading data in parallel with multiple processes to `gluon.data.DataLoader`. The number of workers can be set with `num_worker`. Does not support windows yet. + - Added Block.cast to support networks with different data types, e.g. `float16`. + - Added Lambda block for wrapping a user defined function as a block. + - Generalized `gluon.data.ArrayDataset` to support arbitrary number of arrays. +### New Features - ARM / Raspberry Pi support [Experimental] + - MXNet now compiles and runs on ARMv6, ARMv7, ARMv64 including Raspberry Pi devices. See https://github.com/apache/incubator-mxnet/tree/master/docker_multiarch for more information. +### New Features - NVIDIA Jetson support [Experimental] + - MXNet now compiles and runs on NVIDIA Jetson TX2 boards with GPU acceleration. + - You can install the python MXNet package on a Jetson board by running - `$ pip install mxnet-jetson-tx2`. +### New Features - Sparse Tensor Support [General Availability] + - Added more sparse operators: `contrib.SparseEmbedding`, `sparse.sum` and `sparse.mean`. + - Added `asscipy()` for easier conversion to scipy. + - Added `check_format()` for sparse ndarrays to check if the array format is valid. +### Bug-fixes + - Fixed a[-1] indexing doesn't work on `NDArray`. + - Fixed `expand_dims` if axis < 0. + - Fixed a bug that causes topk to produce incorrect result on large arrays. + - Improved numerical precision of unary and binary operators for `float64` data. + - Fixed derivatives of log2 and log10. They used to be the same with log. + - Fixed a bug that causes MXNet to hang after fork. Note that you still cannot use GPU in child processes after fork due to limitations of CUDA. + - Fixed a bug that causes `CustomOp` to fail when using auxiliary states. + - Fixed a security bug that is causing MXNet to listen on all available interfaces when running training in distributed mode. +### Doc Updates + - Added a security best practices document under FAQ section. + - Fixed License Headers including restoring copyright attributions. + - Documentation updates. + - Links for viewing source. + + For more information and examples, see [full release notes](https://cwiki.apache.org/confluence/display/MXNET/Apache+MXNet+%28incubating%29+1.0+Release+Notes) + + +## 0.12.1 +### Bug-fixes + - Added GPU support for the `syevd` operator which ensures that there is GPU support for all linalg-operators. + - Bugfix for `syevd` on CPU such that it works for `float32`. + - Fixed API call when `OMP_NUM_THREADS` environment variable is set. + - Fixed `MakeNonlossGradNode` bug. + - Fixed bug related to passing `dtype` to `array()`. + - Fixed some minor bugs for sparse distributed training. + - Fixed a bug on `Slice` accessing uninitialized memory in `param.begin` in the file `matrix_op-inl.h`. + - Fixed `gluon.data.RecordFileDataset`. + - Fixed a bug that caused `autograd` to crash on some networks. + + +## 0.12.0 +### Performance + - Added full support for NVIDIA Volta GPU Architecture and CUDA 9. Training CNNs is up to 3.5x faster than Pascal when using float16 precision. + - Enabled JIT compilation. Autograd and Gluon hybridize now use less memory and has faster speed. Performance is almost the same with old symbolic style code. + - Improved ImageRecordIO image loading performance and added indexed RecordIO support. + - Added better openmp thread management to improve CPU performance. +### New Features - Gluon + - Added enhancements to the Gluon package, a high-level interface designed to be easy to use while keeping most of the flexibility of low level API. Gluon supports both imperative and symbolic programming, making it easy to train complex models imperatively with minimal impact on performance. Neural networks (and other machine learning models) can be defined and trained with `gluon.nn` and `gluon.rnn` packages. + - Added new loss functions - `SigmoidBinaryCrossEntropyLoss`, `CTCLoss`, `HuberLoss`, `HingeLoss`, `SquaredHingeLoss`, `LogisticLoss`, `TripletLoss`. + - `gluon.Trainer` now allows reading and setting learning rate with `trainer.learning_rate` property. + - Added API `HybridBlock.export` for exporting gluon models to MXNet format. + - Added `gluon.contrib` package. + - Convolutional recurrent network cells for RNN, LSTM and GRU. + - `VariationalDropoutCell` +### New Features - Autograd + - Added enhancements to `autograd` package, which enables automatic differentiation of NDArray operations. + - `autograd.Function` allows defining both forward and backward computation for custom operators. + - Added `mx.autograd.grad` and experimental second order gradient support (most operators don't support second order gradient yet). + - Autograd now supports cross-device graphs. Use `x.copyto(mx.gpu(i))` and `x.copyto(mx.cpu())` to do computation on multiple devices. +### New Features - Sparse Tensor Support + - Added support for sparse matrices. + - Added limited cpu support for two sparse formats in `Symbol` and `NDArray` - `CSRNDArray` and `RowSparseNDArray`. + - Added a sparse dot product operator and many element-wise sparse operators. + - Added a data iterator for sparse data input - `LibSVMIter`. + - Added three optimizers for sparse gradient updates: `Ftrl`, `SGD` and `Adam`. + - Added `push` and `row_sparse_pull` with `RowSparseNDArray` in distributed kvstore. +### Other New Features + - Added limited support for fancy indexing, which allows you to very quickly access and modify complicated subsets of an array's values. `x[idx_arr0, idx_arr1, ..., idx_arrn]` is now supported. Features such as combining and slicing are planned for the next release. Checkout master to get a preview. + - Random number generators in `mx.nd.random.*` and `mx.sym.random.*` now support both CPU and GPU. + - `NDArray` and `Symbol` now supports "fluent" methods. You can now use `x.exp()` etc instead of `mx.nd.exp(x)` or `mx.sym.exp(x)`. + - Added `mx.rtc.CudaModule` for writing and running CUDA kernels from python. + - Added `multi_precision` option to optimizer for easier float16 training. + - Better support for IDE auto-completion. IDEs like PyCharm can now correctly parse mxnet operators. +### API Changes + - Operators like `mx.sym.linalg_*` and `mx.sym.random_*` are now moved to `mx.sym.linalg.*` and `mx.sym.random.*`. The old names are still available but deprecated. + - `sample_*` and `random_*` are now merged as `random.*`, which supports both scalar and `NDArray` distribution parameters. +### Bug-fixes + - Fixed a bug that causes `argsort` operator to fail on large tensors. + - Fixed numerical stability issues when summing large tensors. + - Fixed a bug that causes arange operator to output wrong results for large ranges. + - Improved numerical precision for unary and binary operators on `float64` inputs. + +For more information and examples, see [full release notes](https://cwiki.apache.org/confluence/display/MXNET/MXNet+0.12.0+Release+Notes) + + +## 0.11.0 +### Major Features - Apple Core ML model converter - Support for Keras v1.2.2 - For more information see [full release notes](https://cwiki.apache.org/confluence/display/MXNET/v0.11.0+Release+Notes) -### - API Changes +### API Changes - Added `CachedOp`. You can now cache the operators that’s called frequently with the same set of arguments to reduce overhead. - Added sample_multinomial for sampling from multinomial distributions. - Added `trunc` operator for rounding towards zero. @@ -16,9 +126,9 @@ MXNet Change Log - `allow_extra` is added to Module.set_params to ignore extra parameters. - Added `mod` operator for modulo. - Added `multi_precision` option to SGD optimizer to improve training with float16. Resnet50 now achieves the same accuracy when trained with float16 and gives 50% speedup on Titan XP. -### - Performance Improvements +### Performance Improvements - ImageRecordIter now stores data in pinned memory to improve GPU memcopy speed. -### - Bugfixes +### Bugfixes - Cython interface is fixed. `make cython` and `python setup.py install --with-cython` should install the cython interface and reduce overhead in applications that use imperative/bucketing. - Fixed various bugs in Faster-RCNN example: https://github.com/dmlc/mxnet/pull/6486 - Fixed various bugs in SSD example. @@ -28,7 +138,7 @@ MXNet Change Log - Fixed context mismatch when loading optimizer states. - Fixed a bug in ReLU activation when using MKL. - Fixed a few race conditions that causes crashes on shutdown. -### - Refactors +### Refactors - Refactored TShape/TBlob to use int64 dimensions and DLTensor as internal storage. Getting ready for migration to DLPack. As a result TBlob::dev_mask_ and TBlob::stride_ are removed. diff --git a/NOTICE b/NOTICE index 03695607e3e9..d5327226ae6c 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,13 @@ -Apache MXNET (incubating) -Copyright [2015-2017] The Apache Software Foundation + Apache MXNET (incubating) + Copyright 2017- The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + + + + + + -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index e0b435513718..6e0f93294bf7 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,7 +1,7 @@ Package: mxnet Type: Package Title: MXNet: A Flexible and Efficient Machine Learning Library for Heterogeneous Distributed Systems -Version: 0.11.0 +Version: 1.0.0 Date: 2017-06-27 Author: Tianqi Chen, Qiang Kou, Tong He Maintainer: Qiang Kou diff --git a/R-package/R/gru.R b/R-package/R/gru.R deleted file mode 100644 index d2ffd9a414c2..000000000000 --- a/R-package/R/gru.R +++ /dev/null @@ -1,355 +0,0 @@ -# gru cell symbol -gru <- function(num.hidden, indata, prev.state, param, seqidx, layeridx, dropout=0) { - if (dropout > 0) - indata <- mx.symbol.Dropout(data=indata, p=dropout) - i2h <- mx.symbol.FullyConnected(data=indata, - weight=param$gates.i2h.weight, - bias=param$gates.i2h.bias, - num.hidden=num.hidden * 2, - name=paste0("t", seqidx, ".l", layeridx, ".gates.i2h")) - h2h <- mx.symbol.FullyConnected(data=prev.state$h, - weight=param$gates.h2h.weight, - bias=param$gates.h2h.bias, - num.hidden=num.hidden * 2, - name=paste0("t", seqidx, ".l", layeridx, ".gates.h2h")) - gates <- i2h + h2h - slice.gates <- mx.symbol.SliceChannel(gates, num.outputs=2, - name=paste0("t", seqidx, ".l", layeridx, ".slice")) - update.gate <- mx.symbol.Activation(slice.gates[[1]], act.type="sigmoid") - reset.gate <- mx.symbol.Activation(slice.gates[[2]], act.type="sigmoid") - - htrans.i2h <- mx.symbol.FullyConnected(data=indata, - weight=param$trans.i2h.weight, - bias=param$trans.i2h.bias, - num.hidden=num.hidden, - name=paste0("t", seqidx, ".l", layeridx, ".trans.i2h")) - h.after.reset <- prev.state$h * reset.gate - htrans.h2h <- mx.symbol.FullyConnected(data=h.after.reset, - weight=param$trans.h2h.weight, - bias=param$trans.h2h.bias, - num.hidden=num.hidden, - name=paste0("t", seqidx, ".l", layeridx, ".trans.h2h")) - h.trans <- htrans.i2h + htrans.h2h - h.trans.active <- mx.symbol.Activation(h.trans, act.type="tanh") - next.h <- prev.state$h + update.gate * (h.trans.active - prev.state$h) - return (list(h=next.h)) -} - -# unrolled gru network -gru.unroll <- function(num.gru.layer, seq.len, input.size, - num.hidden, num.embed, num.label, dropout=0) { - embed.weight <- mx.symbol.Variable("embed.weight") - cls.weight <- mx.symbol.Variable("cls.weight") - cls.bias <- mx.symbol.Variable("cls.bias") - param.cells <- lapply(1:num.gru.layer, function(i) { - cell <- list(gates.i2h.weight = mx.symbol.Variable(paste0("l", i, ".gates.i2h.weight")), - gates.i2h.bias = mx.symbol.Variable(paste0("l", i, ".gates.i2h.bias")), - gates.h2h.weight = mx.symbol.Variable(paste0("l", i, ".gates.h2h.weight")), - gates.h2h.bias = mx.symbol.Variable(paste0("l", i, ".gates.h2h.bias")), - trans.i2h.weight = mx.symbol.Variable(paste0("l", i, ".trans.i2h.weight")), - trans.i2h.bias = mx.symbol.Variable(paste0("l", i, ".trans.i2h.bias")), - trans.h2h.weight = mx.symbol.Variable(paste0("l", i, ".trans.h2h.weight")), - trans.h2h.bias = mx.symbol.Variable(paste0("l", i, ".trans.h2h.bias"))) - return (cell) - }) - last.states <- lapply(1:num.gru.layer, function(i) { - state <- list(h=mx.symbol.Variable(paste0("l", i, ".init.h"))) - return (state) - }) - - # embeding layer - label <- mx.symbol.Variable("label") - data <- mx.symbol.Variable("data") - embed <- mx.symbol.Embedding(data=data, input.dim=input.size, - weight=embed.weight, output.dim=num.embed, name='embed') - wordvec <- mx.symbol.SliceChannel(data=embed, num.outputs=seq.len, squeeze.axis=1) - - last.hidden <- list() - for (seqidx in 1:seq.len) { - hidden <- wordvec[[seqidx]] - # stack GRU - for (i in 1:num.gru.layer) { - dp <- ifelse(i==1, 0, dropout) - next.state <- gru(num.hidden, indata=hidden, - prev.state=last.states[[i]], - param=param.cells[[i]], - seqidx=seqidx, layeridx=i, - dropout=dp) - hidden <- next.state$h - last.states[[i]] <- next.state - } - # decoder - if (dropout > 0) - hidden <- mx.symbol.Dropout(data=hidden, p=dropout) - last.hidden <- c(last.hidden, hidden) - } - last.hidden$dim <- 0 - last.hidden$num.args <- seq.len - concat <-mxnet:::mx.varg.symbol.Concat(last.hidden) - fc <- mx.symbol.FullyConnected(data=concat, - weight=cls.weight, - bias=cls.bias, - num.hidden=num.label) - - label <- mx.symbol.transpose(data=label) - label <- mx.symbol.Reshape(data=label, target.shape=c(0)) - - loss.all <- mx.symbol.SoftmaxOutput(data=fc, label=label, name="sm") - return (loss.all) -} - -# gru inference model symbol -gru.inference.symbol <- function(num.gru.layer, seq.len, input.size, - num.hidden, num.embed, num.label, dropout=0) { - seqidx <- 1 - embed.weight <- mx.symbol.Variable("embed.weight") - cls.weight <- mx.symbol.Variable("cls.weight") - cls.bias <- mx.symbol.Variable("cls.bias") - - param.cells <- lapply(1:num.gru.layer, function(i) { - cell <- list(gates.i2h.weight = mx.symbol.Variable(paste0("l", i, ".gates.i2h.weight")), - gates.i2h.bias = mx.symbol.Variable(paste0("l", i, ".gates.i2h.bias")), - gates.h2h.weight = mx.symbol.Variable(paste0("l", i, ".gates.h2h.weight")), - gates.h2h.bias = mx.symbol.Variable(paste0("l", i, ".gates.h2h.bias")), - trans.i2h.weight = mx.symbol.Variable(paste0("l", i, ".trans.i2h.weight")), - trans.i2h.bias = mx.symbol.Variable(paste0("l", i, ".trans.i2h.bias")), - trans.h2h.weight = mx.symbol.Variable(paste0("l", i, ".trans.h2h.weight")), - trans.h2h.bias = mx.symbol.Variable(paste0("l", i, ".trans.h2h.bias"))) - return (cell) - }) - last.states <- lapply(1:num.gru.layer, function(i) { - state <- list(h=mx.symbol.Variable(paste0("l", i, ".init.h"))) - return (state) - }) - - # embeding layer - data <- mx.symbol.Variable("data") - hidden <- mx.symbol.Embedding(data=data, input_dim=input.size, - weight=embed.weight, output_dim=num.embed, name="embed") - - # stack GRU - for (i in 1:num.gru.layer) { - dp <- ifelse(i==1, 0, dropout) - next.state <- gru(num.hidden, indata=hidden, - prev.state=last.states[[i]], - param=param.cells[[i]], - seqidx=seqidx, layeridx=i, - dropout=dp) - hidden <- next.state$h - last.states[[i]] <- next.state - } - # decoder - if (dropout > 0) - hidden <- mx.symbol.Dropout(data=hidden, p=dropout) - - fc <- mx.symbol.FullyConnected(data=hidden, num_hidden=num.label, - weight=cls.weight, bias=cls.bias, name='pred') - sm <- mx.symbol.SoftmaxOutput(data=fc, name='sm') - unpack.h <- lapply(1:num.gru.layer, function(i) { - state <- last.states[[i]] - state.h <- mx.symbol.BlockGrad(state$h, name=paste0("l", i, ".last.h")) - return (state.h) - }) - - list.all <- c(sm, unpack.h) - return (mx.symbol.Group(list.all)) -} - -#' Training GRU Unrolled Model -#' -#' @param train.data mx.io.DataIter or list(data=R.array, label=R.array) -#' The Training set. -#' @param eval.data mx.io.DataIter or list(data=R.array, label=R.array), optional -#' The validation set used for validation evaluation during the progress. -#' @param num.gru.layer integer -#' The number of the layer of gru. -#' @param seq.len integer -#' The length of the input sequence. -#' @param num.hidden integer -#' The number of hidden nodes. -#' @param num.embed integer -#' The output dim of embedding. -#' @param num.label integer -#' The number of labels. -#' @param batch.size integer -#' The batch size used for R array training. -#' @param input.size integer -#' The input dim of one-hot encoding of embedding -#' @param ctx mx.context, optional -#' The device used to perform training. -#' @param num.round integer, default=10 -#' The number of iterations over training data to train the model. -#' @param update.period integer, default=1 -#' The number of iterations to update parameters during training period. -#' @param initializer initializer object. default=mx.init.uniform(0.01) -#' The initialization scheme for parameters. -#' @param dropout float, default=0 -#' A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer. -#' @param optimizer string, default="sgd" -#' The optimization method. -#' @param ... other parameters passing to \code{mx.gru}/. -#' @return model A trained gru unrolled model. -#' -#' @export -mx.gru <- function( train.data, eval.data=NULL, - num.gru.layer, seq.len, - num.hidden, num.embed, num.label, - batch.size, input.size, - ctx=mx.ctx.default(), - num.round=10, update.period=1, - initializer=mx.init.uniform(0.01), - dropout=0, optimizer='sgd', - ...) { - # check data and change data into iterator - train.data <- check.data(train.data, batch.size, TRUE) - eval.data <- check.data(eval.data, batch.size, FALSE) - - # get unrolled gru symbol - rnn.sym <- gru.unroll( num.gru.layer=num.gru.layer, - num.hidden=num.hidden, - seq.len=seq.len, - input.size=input.size, - num.embed=num.embed, - num.label=num.label, - dropout=dropout) - - init.states.name <- lapply(1:num.gru.layer, function(i) { - state.h <- paste0("l", i, ".init.h") - return (state.h) - }) - - # set up gru model - model <- setup.rnn.model(rnn.sym=rnn.sym, - ctx=ctx, - num.rnn.layer=num.gru.layer, - seq.len=seq.len, - num.hidden=num.hidden, - num.embed=num.embed, - num.label=num.label, - batch.size=batch.size, - input.size=input.size, - init.states.name=init.states.name, - initializer=initializer, - dropout=dropout) - - # train gru model - model <- train.rnn( model, train.data, eval.data, - num.round=num.round, - update.period=update.period, - ctx=ctx, - init.states.name=init.states.name, - ...) - # change model into MXFeedForwardModel - model <- list(symbol=model$symbol, arg.params=model$rnn.exec$ref.arg.arrays, aux.params=model$rnn.exec$ref.aux.arrays) - return(structure(model, class="MXFeedForwardModel")) -} - -#' Create a GRU Inference Model -#' -#' @param num.gru.layer integer -#' The number of the layer of gru. -#' @param input.size integer -#' The input dim of one-hot encoding of embedding -#' @param num.hidden integer -#' The number of hidden nodes. -#' @param num.embed integer -#' The output dim of embedding. -#' @param num.label integer -#' The number of labels. -#' @param batch.size integer, default=1 -#' The batch size used for R array training. -#' @param arg.params list -#' The batch size used for R array training. -#' @param ctx mx.context, optional -#' Model parameter, list of name to NDArray of net's weights. -#' @param dropout float, default=0 -#' A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer. -#' @return model list(rnn.exec=integer, symbol=mxnet symbol, num.rnn.layer=integer, num.hidden=integer, seq.len=integer, batch.size=integer, num.embed=integer) -#' A gru inference model. -#' -#' @export -mx.gru.inference <- function(num.gru.layer, - input.size, - num.hidden, - num.embed, - num.label, - batch.size=1, - arg.params, - ctx=mx.cpu(), - dropout=0.) { - sym <- gru.inference.symbol(num.gru.layer=num.gru.layer, - input.size=input.size, - num.hidden=num.hidden, - num.embed=num.embed, - num.label=num.label, - dropout=dropout) - - init.states.name <- lapply(1:num.gru.layer, function(i) { - state.h <- paste0("l", i, ".init.h") - return (state.h) - }) - - seq.len <- 1 - # set up gru model - model <- setup.rnn.model(rnn.sym=sym, - ctx=ctx, - num.rnn.layer=num.gru.layer, - seq.len=seq.len, - num.hidden=num.hidden, - num.embed=num.embed, - num.label=num.label, - batch.size=batch.size, - input.size=input.size, - init.states.name=init.states.name, - initializer=mx.init.uniform(0.01), - dropout=dropout) - arg.names <- names(model$rnn.exec$ref.arg.arrays) - for (k in names(arg.params)) { - if ((k %in% arg.names) && is.param.name(k) ) { - rnn.input <- list() - rnn.input[[k]] <- arg.params[[k]] - mx.exec.update.arg.arrays(model$rnn.exec, rnn.input, match.name=TRUE) - } - } - init.states <- list() - for (i in 1:num.gru.layer) { - init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0 - } - mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE) - - return (model) -} - -#' Using forward function to predict in gru inference model -#' -#' @param model gru model -#' A gru inference model -#' @param input.data, array.matrix -#' The input data for forward function -#' @param new.seq boolean, default=FALSE -#' Whether the input is the start of a new sequence -#' -#' @return result A list(prob=prob, model=model) containing the result probability of each label and the model. -#' -#' @export -mx.gru.forward <- function(model, input.data, new.seq=FALSE) { - if (new.seq == TRUE) { - init.states <- list() - for (i in 1:model$num.rnn.layer) { - init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0 - } - mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE) - } - dim(input.data) <- c(model$batch.size) - data <- list(data=mx.nd.array(input.data)) - mx.exec.update.arg.arrays(model$rnn.exec, data, match.name=TRUE) - mx.exec.forward(model$rnn.exec, is.train=FALSE) - init.states <- list() - for (i in 1:model$num.rnn.layer) { - init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.outputs[[paste0("l", i, ".last.h_output")]] - } - mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE) - prob <- model$rnn.exec$ref.outputs[["sm_output"]] - return (list(prob=prob, model=model)) -} - diff --git a/R-package/R/initializer.R b/R-package/R/initializer.R index 7a1ffb2b182a..9f5e75be91a5 100644 --- a/R-package/R/initializer.R +++ b/R-package/R/initializer.R @@ -4,11 +4,11 @@ #' @param shape the shape of the array to be generated. #' mx.init.internal.default <- function(name, shape, ctx, allow.unknown=FALSE) { - if (endsWith(name, "bias")) return (mx.nd.zeros(shape, ctx)) - if (endsWith(name, "gamma")) return (mx.nd.ones(shape, ctx)) - if (endsWith(name, "beta")) return (mx.nd.zeros(shape, ctx)) - if (endsWith(name, "moving_mean")) return (mx.nd.zeros(shape, ctx)) - if (endsWith(name, "moving_var")) return (mx.nd.ones(shape, ctx)) + if (endsWith(name, "bias")) return (mx.nd.zeros(shape)) + if (endsWith(name, "gamma")) return (mx.nd.ones(shape)) + if (endsWith(name, "beta")) return (mx.nd.zeros(shape)) + if (endsWith(name, "moving_mean")) return (mx.nd.zeros(shape)) + if (endsWith(name, "moving_var")) return (mx.nd.ones(shape)) if (allow.unknown) return(NULL) stop(paste("Unkown initialization pattern for ", name)) } @@ -21,9 +21,9 @@ mx.init.internal.default <- function(name, shape, ctx, allow.unknown=FALSE) { mx.init.uniform <- function(scale) { function(name, shape, ctx, allow.unknown=FALSE) { if (!endsWith(name, "weight")) { - return (mx.init.internal.default(name, shape, ctx, allow.unknown)) + return (mx.init.internal.default(name = name, shape = shape, allow.unknown = allow.unknown)) } - return (mx.runif(shape, -scale, scale, ctx)) + return (mx.nd.random.uniform(low = -scale, high = scale, shape = shape)) } } @@ -35,9 +35,9 @@ mx.init.uniform <- function(scale) { mx.init.normal <- function(sd) { function(name, shape, ctx, allow.unknown=FALSE) { if (!endsWith(name, "weight")) { - return (mx.init.internal.default(name, shape, ctx, allow.unknown)) + return (mx.init.internal.default(name = name, shape = shape, allow.unknown = allow.unknown)) } - return (mx.rnorm(shape, 0, sd, ctx)) + return (mx.nd.random.normal(loc = 0, scale = sd, shape = shape)) } } @@ -56,9 +56,9 @@ mx.init.Xavier <- function(rnd_type = "uniform", factor_type = "avg", magnitude = 3){ function(name, shape, ctx, allow.unknown = FALSE){ if (!endsWith(name, "weight")) { - return (mx.init.internal.default(name, shape, ctx, allow.unknown)) + return (mx.init.internal.default(name = name, shape = shape, allow.unknown = allow.unknown)) } - + fan_out = shape[length(shape)] fan_in = prod(shape[-length(shape)]) factor_val = 1 @@ -71,13 +71,13 @@ mx.init.Xavier <- function(rnd_type = "uniform", factor_type = "avg", } else { stop("Not supported factor type. See usage of function mx.init.Xavier") } - + scale = sqrt(magnitude / factor_val) - + if (rnd_type == "uniform"){ - return(mx.runif(shape, -scale, scale, ctx)) + return(mx.nd.random.uniform(low = -scale, high = scale, shape = shape)) } else if (rnd_type == "gaussian"){ - return(mx.rnorm(shape, 0, scale, ctx)) + return(mx.nd.random.normal(loc = 0, scale = scale, shape = shape)) } else { stop("Not supported random type. See usage of function mx.init.Xavier") } @@ -92,7 +92,7 @@ mx.init.Xavier <- function(rnd_type = "uniform", factor_type = "avg", #' @param ctx mx.context The context of the weights #' @param skip.unknown Whether skip the unknown weight types #' @export -mx.init.create <- function(initializer, shape.array, ctx, skip.unknown=TRUE) { +mx.init.create <- function(initializer, shape.array, ctx=NULL, skip.unknown=TRUE) { if (length(shape.array) == 0) return(list()) names = names(shape.array) ret <- lapply(1 : length(names), function(i) { diff --git a/R-package/R/lstm.R b/R-package/R/lstm.R deleted file mode 100644 index 622388993c8c..000000000000 --- a/R-package/R/lstm.R +++ /dev/null @@ -1,388 +0,0 @@ -# lstm cell symbol -lstm <- function(num.hidden, indata, prev.state, param, seqidx, layeridx, dropout=0) { - if (dropout > 0) - indata <- mx.symbol.Dropout(data=indata, p=dropout) - i2h <- mx.symbol.FullyConnected(data=indata, - weight=param$i2h.weight, - bias=param$i2h.bias, - num.hidden=num.hidden * 4, - name=paste0("t", seqidx, ".l", layeridx, ".i2h")) - h2h <- mx.symbol.FullyConnected(data=prev.state$h, - weight=param$h2h.weight, - bias=param$h2h.bias, - num.hidden=num.hidden * 4, - name=paste0("t", seqidx, ".l", layeridx, ".h2h")) - gates <- i2h + h2h - slice.gates <- mx.symbol.SliceChannel(gates, num.outputs=4, - name=paste0("t", seqidx, ".l", layeridx, ".slice")) - - in.gate <- mx.symbol.Activation(slice.gates[[1]], act.type="sigmoid") - in.transform <- mx.symbol.Activation(slice.gates[[2]], act.type="tanh") - forget.gate <- mx.symbol.Activation(slice.gates[[3]], act.type="sigmoid") - out.gate <- mx.symbol.Activation(slice.gates[[4]], act.type="sigmoid") - next.c <- (forget.gate * prev.state$c) + (in.gate * in.transform) - next.h <- out.gate * mx.symbol.Activation(next.c, act.type="tanh") - - return (list(c=next.c, h=next.h)) -} - -# unrolled lstm network -lstm.unroll <- function(num.lstm.layer, seq.len, input.size, - num.hidden, num.embed, num.label, dropout=0.) { - - embed.weight <- mx.symbol.Variable("embed.weight") - cls.weight <- mx.symbol.Variable("cls.weight") - cls.bias <- mx.symbol.Variable("cls.bias") - - param.cells <- lapply(1:num.lstm.layer, function(i) { - cell <- list(i2h.weight = mx.symbol.Variable(paste0("l", i, ".i2h.weight")), - i2h.bias = mx.symbol.Variable(paste0("l", i, ".i2h.bias")), - h2h.weight = mx.symbol.Variable(paste0("l", i, ".h2h.weight")), - h2h.bias = mx.symbol.Variable(paste0("l", i, ".h2h.bias"))) - return (cell) - }) - last.states <- lapply(1:num.lstm.layer, function(i) { - state <- list(c=mx.symbol.Variable(paste0("l", i, ".init.c")), - h=mx.symbol.Variable(paste0("l", i, ".init.h"))) - return (state) - }) - - # embeding layer - label <- mx.symbol.Variable("label") - data <- mx.symbol.Variable("data") - embed <- mx.symbol.Embedding(data=data, input_dim=input.size, - weight=embed.weight, output_dim=num.embed, name="embed") - wordvec <- mx.symbol.SliceChannel(data=embed, num_outputs=seq.len, squeeze_axis=1) - - last.hidden <- list() - for (seqidx in 1:seq.len) { - hidden <- wordvec[[seqidx]] - # stack lstm - for (i in 1:num.lstm.layer) { - dp <- ifelse(i==1, 0, dropout) - next.state <- lstm(num.hidden, indata=hidden, - prev.state=last.states[[i]], - param=param.cells[[i]], - seqidx=seqidx, layeridx=i, - dropout=dp) - hidden <- next.state$h - last.states[[i]] <- next.state - } - # decoder - if (dropout > 0) - hidden <- mx.symbol.Dropout(data=hidden, p=dropout) - last.hidden <- c(last.hidden, hidden) - } - last.hidden$dim <- 0 - last.hidden$num.args <- seq.len - concat <-mxnet:::mx.varg.symbol.Concat(last.hidden) - fc <- mx.symbol.FullyConnected(data=concat, - weight=cls.weight, - bias=cls.bias, - num.hidden=num.label) - - label <- mx.symbol.transpose(data=label) - label <- mx.symbol.Reshape(data=label, target.shape=c(0)) - - loss.all <- mx.symbol.SoftmaxOutput(data=fc, label=label, name="sm") - return (loss.all) -} - -# lstm inference model symbol -lstm.inference.symbol <- function(num.lstm.layer, input.size, - num.hidden, num.embed, num.label, dropout=0.) { - seqidx <- 0 - embed.weight <- mx.symbol.Variable("embed.weight") - cls.weight <- mx.symbol.Variable("cls.weight") - cls.bias <- mx.symbol.Variable("cls.bias") - - param.cells <- lapply(1:num.lstm.layer, function(i) { - cell <- list(i2h.weight = mx.symbol.Variable(paste0("l", i, ".i2h.weight")), - i2h.bias = mx.symbol.Variable(paste0("l", i, ".i2h.bias")), - h2h.weight = mx.symbol.Variable(paste0("l", i, ".h2h.weight")), - h2h.bias = mx.symbol.Variable(paste0("l", i, ".h2h.bias"))) - return (cell) - }) - last.states <- lapply(1:num.lstm.layer, function(i) { - state <- list(c=mx.symbol.Variable(paste0("l", i, ".init.c")), - h=mx.symbol.Variable(paste0("l", i, ".init.h"))) - return (state) - }) - - # embeding layer - data <- mx.symbol.Variable("data") - hidden <- mx.symbol.Embedding(data=data, input_dim=input.size, - weight=embed.weight, output_dim=num.embed, name="embed") - - # stack lstm - for (i in 1:num.lstm.layer) { - dp <- ifelse(i==1, 0, dropout) - next.state <- lstm(num.hidden, indata=hidden, - prev.state=last.states[[i]], - param=param.cells[[i]], - seqidx=seqidx, layeridx=i, - dropout=dp) - hidden <- next.state$h - last.states[[i]] <- next.state - } - # decoder - if (dropout > 0) - hidden <- mx.symbol.Dropout(data=hidden, p=dropout) - - fc <- mx.symbol.FullyConnected(data=hidden, num_hidden=num.label, - weight=cls.weight, bias=cls.bias, name='pred') - sm <- mx.symbol.SoftmaxOutput(data=fc, name='sm') - unpack.c <- lapply(1:num.lstm.layer, function(i) { - state <- last.states[[i]] - state.c <- mx.symbol.BlockGrad(state$c, name=paste0("l", i, ".last.c")) - return (state.c) - }) - unpack.h <- lapply(1:num.lstm.layer, function(i) { - state <- last.states[[i]] - state.h <- mx.symbol.BlockGrad(state$h, name=paste0("l", i, ".last.h")) - return (state.h) - }) - - list.all <- c(sm, unpack.c, unpack.h) - return (mx.symbol.Group(list.all)) -} - - - -#' Training LSTM Unrolled Model -#' -#' @param train.data mx.io.DataIter or list(data=R.array, label=R.array) -#' The Training set. -#' @param eval.data mx.io.DataIter or list(data=R.array, label=R.array), optional -#' The validation set used for validation evaluation during the progress. -#' @param num.lstm.layer integer -#' The number of the layer of lstm. -#' @param seq.len integer -#' The length of the input sequence. -#' @param num.hidden integer -#' The number of hidden nodes. -#' @param num.embed integer -#' The output dim of embedding. -#' @param num.label integer -#' The number of labels. -#' @param batch.size integer -#' The batch size used for R array training. -#' @param input.size integer -#' The input dim of one-hot encoding of embedding -#' @param ctx mx.context, optional -#' The device used to perform training. -#' @param num.round integer, default=10 -#' The number of iterations over training data to train the model. -#' @param update.period integer, default=1 -#' The number of iterations to update parameters during training period. -#' @param initializer initializer object. default=mx.init.uniform(0.01) -#' The initialization scheme for parameters. -#' @param dropout float, default=0 -#' A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer. -#' @param optimizer string, default="sgd" -#' The optimization method. -#' @param epoch.end.callback function, optional -#' The callback when iteration ends. -#' @param batch.end.callback function, optional -#' The callback when one mini-batch iteration ends. -#' @param ... other parameters passing to \code{mx.lstm}/. -#' @return model A trained lstm unrolled model. -#' -#' @export -mx.lstm <- function(train.data, eval.data=NULL, - num.lstm.layer, seq.len, - num.hidden, num.embed, num.label, - batch.size, input.size, - ctx=mx.ctx.default(), - num.round=10, update.period=1, - initializer=mx.init.uniform(0.01), - dropout=0, optimizer='sgd', - epoch.end.callback=NULL, batch.end.callback=NULL, - model, - arg.params, - ...) { - # check data and change data into iterator - train.data <- check.data(train.data, batch.size, TRUE) - eval.data <- check.data(eval.data, batch.size, FALSE) - - - - # get unrolled lstm symbol - if(missing(model)){ - rnn.sym <- lstm.unroll(num.lstm.layer=num.lstm.layer, - num.hidden=num.hidden, - seq.len=seq.len, - input.size=input.size, - num.embed=num.embed, - num.label=num.label, - dropout=dropout) - } else { - rnn.sym=model$symbol - } - - init.states.c <- lapply(1:num.lstm.layer, function(i) { - state.c <- paste0("l", i, ".init.c") - return (state.c) - }) - init.states.h <- lapply(1:num.lstm.layer, function(i) { - state.h <- paste0("l", i, ".init.h") - return (state.h) - }) - init.states.name <- c(init.states.c, init.states.h) - - # set up lstm model - model <- setup.rnn.model(rnn.sym=rnn.sym, - ctx=ctx, - num.rnn.layer=num.lstm.layer, - seq.len=seq.len, - num.hidden=num.hidden, - num.embed=num.embed, - num.label=num.label, - batch.size=batch.size, - input.size=input.size, - init.states.name=init.states.name, - initializer=initializer, - dropout=dropout) - # restore states - if (!missing(arg.params)){ - arg.names <- names(model$rnn.exec$ref.arg.arrays) - for (k in names(arg.params)) { - if ((k %in% arg.names) && is.param.name(k) ) { - rnn.input <- list() - rnn.input[[k]] <- arg.params[[k]] - mx.exec.update.arg.arrays(model$rnn.exec, rnn.input, match.name=TRUE) - } - } - } - - # train lstm model - model <- train.rnn( model, train.data, eval.data, - num.round=num.round, - update.period=update.period, - ctx=ctx, - init.states.name=init.states.name, - epoch.end.callback=epoch.end.callback, - batch.end.callback=batch.end.callback, - ...) - # change model into MXFeedForwardModel - model <- list(symbol=model$symbol, arg.params=model$rnn.exec$ref.arg.arrays, aux.params=model$rnn.exec$ref.aux.arrays) - return(structure(model, class="MXFeedForwardModel")) -} - - -#' Create a LSTM Inference Model -#' -#' @param num.lstm.layer integer -#' The number of the layer of lstm. -#' @param input.size integer -#' The input dim of one-hot encoding of embedding -#' @param num.hidden integer -#' The number of hidden nodes. -#' @param num.embed integer -#' The output dim of embedding. -#' @param num.label integer -#' The number of labels. -#' @param batch.size integer, default=1 -#' The batch size used for R array training. -#' @param arg.params list -#' The batch size used for R array training. -#' @param ctx mx.context, optional -#' Model parameter, list of name to NDArray of net's weights. -#' @param dropout float, default=0 -#' A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer. -#' @return model list(rnn.exec=integer, symbol=mxnet symbol, num.rnn.layer=integer, num.hidden=integer, seq.len=integer, batch.size=integer, num.embed=integer) -#' A lstm inference model. -#' -#' @export -mx.lstm.inference <- function(num.lstm.layer, - input.size, - num.hidden, - num.embed, - num.label, - batch.size=1, - arg.params, - ctx=mx.cpu(), - dropout=0.) { - sym <- lstm.inference.symbol(num.lstm.layer=num.lstm.layer, - input.size=input.size, - num.hidden=num.hidden, - num.embed=num.embed, - num.label=num.label, - dropout=dropout) - - init.states.c <- lapply(1:num.lstm.layer, function(i) { - state.c <- paste0("l", i, ".init.c") - return (state.c) - }) - init.states.h <- lapply(1:num.lstm.layer, function(i) { - state.h <- paste0("l", i, ".init.h") - return (state.h) - }) - init.states.name <- c(init.states.c, init.states.h) - - seq.len <- 1 - # set up lstm model - model <- setup.rnn.model(rnn.sym=sym, - ctx=ctx, - num.rnn.layer=num.lstm.layer, - seq.len=seq.len, - num.hidden=num.hidden, - num.embed=num.embed, - num.label=num.label, - batch.size=batch.size, - input.size=input.size, - init.states.name=init.states.name, - initializer=mx.init.uniform(0.01), - dropout=dropout) - arg.names <- names(model$rnn.exec$ref.arg.arrays) - for (k in names(arg.params)) { - if ((k %in% arg.names) && is.param.name(k) ) { - rnn.input <- list() - rnn.input[[k]] <- arg.params[[k]] - mx.exec.update.arg.arrays(model$rnn.exec, rnn.input, match.name=TRUE) - } - } - init.states <- list() - for (i in 1:num.lstm.layer) { - init.states[[paste0("l", i, ".init.c")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.c")]]*0 - init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0 - } - mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE) - - return (model) -} - -#' Using forward function to predict in lstm inference model -#' -#' @param model lstm model -#' A Lstm inference model -#' @param input.data, array.matrix -#' The input data for forward function -#' @param new.seq boolean, default=FALSE -#' Whether the input is the start of a new sequence -#' -#' @return result A list(prob=prob, model=model) containing the result probability of each label and the model. -#' -#' @export -mx.lstm.forward <- function(model, input.data, new.seq=FALSE) { - if (new.seq == TRUE) { - init.states <- list() - for (i in 1:model$num.rnn.layer) { - init.states[[paste0("l", i, ".init.c")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.c")]]*0 - init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0 - } - mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE) - } - dim(input.data) <- c(model$batch.size) - data <- list(data=mx.nd.array(input.data)) - mx.exec.update.arg.arrays(model$rnn.exec, data, match.name=TRUE) - mx.exec.forward(model$rnn.exec, is.train=FALSE) - init.states <- list() - for (i in 1:model$num.rnn.layer) { - init.states[[paste0("l", i, ".init.c")]] <- model$rnn.exec$ref.outputs[[paste0("l", i, ".last.c_output")]] - init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.outputs[[paste0("l", i, ".last.h_output")]] - } - mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE) - prob <- model$rnn.exec$ref.outputs[["sm_output"]] - return (list(prob=prob, model=model)) -} diff --git a/R-package/R/model.rnn.R b/R-package/R/model.rnn.R new file mode 100644 index 000000000000..8f3ab8c25874 --- /dev/null +++ b/R-package/R/model.rnn.R @@ -0,0 +1,339 @@ +# Internal function to do multiple device training on RNN +mx.model.train.buckets <- function(symbol, ctx, train.data, eval.data, + dlist, arg.params, aux.params, + grad.req, arg.update.idx, + begin.round, end.round, optimizer, metric, + epoch.end.callback, batch.end.callback, kvstore, verbose = TRUE) { + + ndevice <- length(ctx) + if (verbose) + message(paste0("Start training with ", ndevice, " devices")) + + input.names <- names(dlist) + arg.params.names <- names(arg.params) + + if (is.list(symbol)) sym_ini <- symbol[[names(train.data$bucketID)]] else sym_ini <- symbol + + slices <- lapply(1:ndevice, function(i) { + sapply(names(dlist), function(n) mx.nd.split(data=dlist[[n]], num_outputs = ndevice, axis = 0, squeeze_axis = F)) + }) + + train.execs <- lapply(1:ndevice, function(i) { + s <- slices[[i]] + mx.symbol.bind(symbol = sym_ini, arg.arrays = c(s, arg.params)[arg.update.idx], + aux.arrays = aux.params, ctx = ctx[[i]], grad.req = grad.req) + }) + + # KVStore related stuffs + params.index <- as.integer( + mx.util.filter.null( + lapply(1:length(train.execs[[1]]$ref.grad.arrays), function(k) { + if (!is.null(train.execs[[1]]$ref.grad.arrays[[k]])) k else NULL} + ))) + + update.on.kvstore <- FALSE + if (!is.null(kvstore) && kvstore$update.on.kvstore) { + update.on.kvstore <- TRUE + kvstore$set.optimizer(optimizer) + } else { + updaters <- lapply(1:ndevice, function(i) { + mx.opt.get.updater(optimizer, train.execs[[i]]$ref.arg.arrays) + }) + } + + if (!is.null(kvstore)) { + kvstore$init(params.index, train.execs[[1]]$ref.arg.arrays[params.index]) + } + + # train over specified number of epochs + for (iteration in begin.round:end.round) { + nbatch <- 0 + if (!is.null(metric)) { + train.metric <- metric$init() + } + train.data$reset() + while (train.data$iter.next()) { + + # Get iterator data + dlist <- train.data$value()[input.names] + + # Slice inputs for multi-devices + slices <- lapply(1:ndevice, function(i) { + sapply(names(dlist), function(n) mx.nd.split(data=dlist[[n]], num_outputs = ndevice, axis = 0, squeeze_axis = F)) + }) + + # Assign input to each executor - bug on inference if using BatchNorm + if (is.list(symbol)) { + train.execs <- lapply(1:ndevice, function(i) { + s <- slices[[i]] + mx.symbol.bind(symbol = symbol[[names(train.data$bucketID)]], + arg.arrays = c(s, train.execs[[i]]$arg.arrays[arg.params.names])[arg.update.idx], + aux.arrays = train.execs[[i]]$aux.arrays, ctx = ctx[[i]], grad.req = grad.req) + }) + } else { + for (i in 1:ndevice) { + s <- slices[[i]] + mx.exec.update.arg.arrays(train.execs[[i]], s, match.name=TRUE) + } + } + + for (texec in train.execs) { + mx.exec.forward(texec, is.train = TRUE) + } + + out.preds <- lapply(train.execs, function(texec) { + mx.nd.copyto(texec$ref.outputs[[1]], mx.cpu()) + }) + + for (texec in train.execs) { + mx.exec.backward(texec) + } + + if (!is.null(kvstore)) { + # push the gradient + kvstore$push(params.index, lapply(train.execs, function(texec) { + texec$ref.grad.arrays[params.index] + }), -params.index) + } + if (update.on.kvstore) { + # pull back weight + kvstore$pull(params.index, lapply(train.execs, function(texec) { + texec$ref.arg.arrays[params.index] + }), -params.index) + } else { + # pull back gradient sums + if (!is.null(kvstore)) { + kvstore$pull(params.index, lapply(train.execs, function(texec) { + texec$ref.grad.arrays[params.index] + }), -params.index) + } + arg.blocks <- lapply(1:ndevice, function(i) { + updaters[[i]](train.execs[[i]]$ref.arg.arrays, train.execs[[i]]$ref.grad.arrays) + }) + for (i in 1:ndevice) { + mx.exec.update.arg.arrays(train.execs[[i]], arg.blocks[[i]], skip.null = TRUE) + } + } + + # Update the evaluation metrics + if (!is.null(metric)) { + for (i in 1:ndevice) { + train.metric <- metric$update(label = slices[[i]][[length(slices[[i]])]], + pred = out.preds[[i]], state = train.metric) + } + } + + nbatch <- nbatch + 1 + + if (!is.null(batch.end.callback)) { + batch.end.callback(iteration, nbatch, environment()) + } + } + + if (!is.null(metric)) { + result <- metric$get(train.metric) + if (verbose) + message(paste0("[", iteration, "] Train-", result$name, "=", result$value)) + } + + if (!is.null(eval.data)) { + if (!is.null(metric)) { + eval.metric <- metric$init() + } + eval.data$reset() + while (eval.data$iter.next()) { + + # Get iterator data + dlist <- eval.data$value()[input.names] + + # Slice input to multiple devices + slices <- lapply(1:ndevice, function(i) { + sapply(names(dlist), function(n) mx.nd.split(data=dlist[[n]], num_outputs = ndevice, axis = 0, squeeze_axis = F)) + }) + + # Assign input to each executor - bug on inference if using BatchNorm + if (is.list(symbol)) { + train.execs <- lapply(1:ndevice, function(i) { + s <- slices[[i]] + mx.symbol.bind(symbol = symbol[[names(eval.data$bucketID)]], + arg.arrays = c(s, train.execs[[i]]$arg.arrays[arg.params.names])[arg.update.idx], + aux.arrays = train.execs[[i]]$aux.arrays, ctx = ctx[[i]], grad.req = grad.req) + }) + } else { + for (i in 1:ndevice) { + s <- slices[[i]] + mx.exec.update.arg.arrays(train.execs[[i]], s, match.name=TRUE) + } + } + + for (texec in train.execs) { + mx.exec.forward(texec, is.train = FALSE) + } + + # copy outputs to CPU + out.preds <- lapply(train.execs, function(texec) { + mx.nd.copyto(texec$ref.outputs[[1]], mx.cpu()) + }) + + if (!is.null(metric)) { + for (i in 1:ndevice) { + eval.metric <- metric$update(slices[[i]][[length(slices[[i]])]], + out.preds[[i]], eval.metric) + } + } + } + + if (!is.null(metric)) { + result <- metric$get(eval.metric) + if (verbose) { + message(paste0("[", iteration, "] Validation-", result$name, "=", + result$value)) + } + } + } else { + eval.metric <- NULL + } + # get the model out + model <- mx.model.extract.model(sym_ini, train.execs) + + epoch_continue <- TRUE + if (!is.null(epoch.end.callback)) { + epoch_continue <- epoch.end.callback(iteration, 0, environment(), verbose = verbose) + } + + if (!epoch_continue) { + break + } + } + return(model) +} + + +# +#' Train RNN with bucket support +#' +#' @param symbol Symbol or list of Symbols representing the model +#' @param train.data Training data created by mx.io.bucket.iter +#' @param eval.data Evaluation data created by mx.io.bucket.iter +#' @param num.round int, number of epoch +#' @param initializer +#' @param optimizer +#' @param batch.end.callback +#' @param epoch.end.callback +#' @param begin.round +#' @param metric +#' @param ctx +#' @param kvstore +#' @param verbose +#' +#' @export +mx.model.buckets <- function(symbol, train.data, eval.data = NULL, metric = NULL, + arg.params = NULL, aux.params = NULL, fixed.params = NULL, + num.round = 1, begin.round = 1, + initializer = mx.init.uniform(0.01), optimizer = "sgd", ctx = NULL, + batch.end.callback = NULL, epoch.end.callback = NULL, + kvstore = "local", verbose = TRUE) { + + if (!train.data$iter.next()) { + train.data$reset() + if (!train.data$iter.next()) + stop("Empty train.data") + } + + if (!is.null(eval.data)) { + if (!eval.data$iter.next()) { + eval.data$reset() + if (!eval.data$iter.next()) + stop("Empty eval.data") + } + } + + if (is.null(ctx)) + ctx <- mx.ctx.default() + if (is.mx.context(ctx)) { + ctx <- list(ctx) + } + if (!is.list(ctx)) + stop("ctx must be mx.context or list of mx.context") + if (is.character(optimizer)) { + if (is.numeric(input.shape)) { + ndim <- length(input.shape) + batchsize <- input.shape[[ndim]] + } else { + ndim <- length(input.shape[[1]]) + batchsize <- input.shape[[1]][[ndim]] + } + optimizer <- mx.opt.create(optimizer, rescale.grad = (1/batchsize), ...) + } + + if (is.list(symbol)) sym_ini <- symbol[[names(train.data$bucketID)]] else sym_ini <- symbol + + arguments <- sym_ini$arguments + input.names <- intersect(names(train.data$value()), arguments) + + input.shape <- sapply(input.names, function(n) { + dim(train.data$value()[[n]]) + }, simplify = FALSE) + + shapes <- sym_ini$infer.shape(input.shape) + + # assign arg.params and aux.params arguments to arg.params.input and aux.params.input + arg.params.input <- arg.params + aux.params.input <- aux.params + + # initialize all arguments with zeros + arg.params <- lapply(shapes$arg.shapes, function(shape) { + mx.nd.zeros(shape = shape, ctx = mx.cpu()) + }) + + # initialize input parameters + dlist <- arg.params[input.names] + + # initialize parameters - only argument ending with _weight and _bias are initialized + arg.params.ini <- mx.init.create(initializer = initializer, shape.array = shapes$arg.shapes, ctx = mx.cpu(), skip.unknown = TRUE) + + # assign initilized parameters to arg.params + arg.params[names(arg.params.ini)] <- arg.params.ini + + # assign input params to arg.params + arg.params[names(arg.params.input)] <- arg.params.input + + # remove input params from arg.params + arg.params[input.names] <- NULL + + # Grad request + grad.req <- rep("null", length(arguments)) + grad.req.write <- arguments %in% setdiff(names(arg.params.ini), fixed.params) + grad.req[grad.req.write] <- "write" + + # Arg array order + update_names <- c(input.names, names(arg.params)) + arg.update.idx <- match(arguments, update_names) + + # aux parameters setup + aux.params <- lapply(shapes$aux.shapes, function(shape) { + mx.nd.zeros(shape = shape, ctx = mx.cpu()) + }) + + aux.params.ini <- mx.init.create(initializer, shapes$aux.shapes, ctx = mx.cpu(), skip.unknown = FALSE) + if (length(aux.params) > 0) { + aux.params[names(aux.params.ini)] <- aux.params.ini + } else aux.params <- NULL + + aux.params[names(aux.params.input)] <- aux.params.input + + # kvstore initialization + kvstore <- mx.model.create.kvstore(kvstore, params$arg.params, length(ctx), + verbose = verbose) + + ### Execute training + model <- mx.model.train.buckets(symbol = symbol, ctx = ctx, train.data = train.data, eval.data = eval.data, + dlist = dlist, arg.params = arg.params, aux.params = aux.params, + grad.req = grad.req, arg.update.idx = arg.update.idx, + optimizer = optimizer, metric = metric, + begin.round = begin.round, end.round = num.round, + batch.end.callback = batch.end.callback, epoch.end.callback = epoch.end.callback, + kvstore = kvstore, verbose = verbose) + + return(model) +} diff --git a/R-package/R/mx.io.bucket.iter.R b/R-package/R/mx.io.bucket.iter.R new file mode 100644 index 000000000000..8e5ab59eaab8 --- /dev/null +++ b/R-package/R/mx.io.bucket.iter.R @@ -0,0 +1,110 @@ + +BucketIter <- setRefClass("BucketIter", fields = c("buckets", "bucket.names", "batch.size", + "data.mask.element", "shuffle", "bucket.plan", "bucketID", "epoch", "batch", "batch.per.bucket", + "last.batch.pad", "batch.per.epoch", "seed"), + methods = list( + initialize = function(buckets, + batch.size, data.mask.element = 0, shuffle = FALSE, seed = 123) { + .self$buckets <- buckets + .self$bucket.names <- names(.self$buckets) + .self$batch.size <- batch.size + .self$data.mask.element <- data.mask.element + .self$epoch <- 0 + .self$batch <- 0 + .self$shuffle <- shuffle + .self$batch.per.bucket <- 0 + .self$batch.per.epoch <- 0 + .self$bucket.plan <- NULL + .self$bucketID <- NULL + .self$seed <- seed + .self + }, reset = function() { + buckets_nb <- length(bucket.names) + buckets_id <- 1:buckets_nb + buckets.size <- sapply(.self$buckets, function(x) { + dim(x$data)[length(dim(x$data)) - 1] + }) + .self$batch.per.bucket <- ceiling(buckets.size/.self$batch.size) + .self$last.batch.pad <- .self$batch.size - buckets.size %% .self$batch.size + .self$last.batch.pad[.self$last.batch.pad == .self$batch.size] <- 0 + + .self$batch.per.epoch <- sum(.self$batch.per.bucket) + # Number of batches per epoch given the batch.size + .self$batch.per.epoch <- sum(.self$batch.per.bucket) + .self$epoch <- .self$epoch + 1 + .self$batch <- 0 + + if (.self$shuffle) { + set.seed(.self$seed) + bucket_plan_names <- sample(rep(names(.self$batch.per.bucket), times = .self$batch.per.bucket)) + .self$bucket.plan <- ave(bucket_plan_names == bucket_plan_names, bucket_plan_names, + FUN = cumsum) + names(.self$bucket.plan) <- bucket_plan_names + ### Return first BucketID at reset for initialization of the model + .self$bucketID <- .self$bucket.plan[1] + + .self$buckets <- lapply(.self$buckets, function(x) { + shuffle_id <- sample(dim(x$data)[length(dim(x$data)) - 1]) + if (length(dim(x$label)) == 0) { + list(data = x$data[shuffle_id, ], label = x$label[shuffle_id]) + } else { + list(data = x$data[shuffle_id, ], label = x$label[shuffle_id, ]) + } + }) + } else { + bucket_plan_names <- rep(names(.self$batch.per.bucket), times = .self$batch.per.bucket) + .self$bucket.plan <- ave(bucket_plan_names == bucket_plan_names, bucket_plan_names, + FUN = cumsum) + names(.self$bucket.plan) <- bucket_plan_names + } + }, iter.next = function() { + .self$batch <- .self$batch + 1 + .self$bucketID <- .self$bucket.plan[batch] + if (.self$batch > .self$batch.per.epoch) { + return(FALSE) + } else { + return(TRUE) + } + }, value = function() { + # bucketID is a named integer: the integer indicates the batch id for the given + # bucket (used to fetch appropriate samples within the bucket) the name is the a + # character containing the sequence length of the bucket (used to unroll the rnn + # to appropriate sequence length) + idx <- (.self$bucketID - 1) * (.self$batch.size) + (1:batch.size) + + ### reuse first idx for padding + if (bucketID == .self$batch.per.bucket[names(.self$bucketID)] & !.self$last.batch.pad[names(.self$bucketID)] == 0) { + idx <- c(idx[1:(.self$batch.size - .self$last.batch.pad[names(.self$bucketID)])], 1:(.self$last.batch.pad[names(.self$bucketID)])) + } + + data <- .self$buckets[[names(.self$bucketID)]]$data[idx, , drop = F] + seq.mask <- as.integer(names(bucketID)) - apply(data==.self$data.mask.element, 1, sum) + if (length(dim(.self$buckets[[names(.self$bucketID)]]$label)) == 0) { + label <- .self$buckets[[names(.self$bucketID)]]$label[idx] + } else { + label <- .self$buckets[[names(.self$bucketID)]]$label[idx, , drop = F] + } + return(list(data = mx.nd.array(data), seq.mask = mx.nd.array(seq.mask), + label = mx.nd.array(label))) + }, num.pad = function() { + if (bucketID == .self$batch.per.bucket[names(.self$bucketID)] & !.self$last.batch.pad[names(.self$bucketID)] == 0){ + return(.self$last.batch.pad[names(.self$bucketID)]) + } else return(0) + }, finalize = function() { + })) + +# +#' Create Bucket Iter +#' +#' @param buckets The data array. +#' @param batch.size The batch size used to pack the array. +#' @param data.mask.element The element to mask +#' @param shuffle Whether shuffle the data +#' @param seed The random seed +#' +#' @export +mx.io.bucket.iter <- function(buckets, batch.size, data.mask.element = 0, shuffle = FALSE, + seed = 123) { + return(BucketIter$new(buckets = buckets, batch.size = batch.size, data.mask.element = data.mask.element, + shuffle = shuffle, seed = seed)) +} diff --git a/R-package/R/rnn.R b/R-package/R/rnn.R deleted file mode 100644 index b89559a58570..000000000000 --- a/R-package/R/rnn.R +++ /dev/null @@ -1,342 +0,0 @@ -# rnn cell symbol -rnn <- function(num.hidden, indata, prev.state, param, seqidx, - layeridx, dropout=0., batch.norm=FALSE) { - if (dropout > 0. ) - indata <- mx.symbol.Dropout(data=indata, p=dropout) - i2h <- mx.symbol.FullyConnected(data=indata, - weight=param$i2h.weight, - bias=param$i2h.bias, - num.hidden=num.hidden, - name=paste0("t", seqidx, ".l", layeridx, ".i2h")) - h2h <- mx.symbol.FullyConnected(data=prev.state$h, - weight=param$h2h.weight, - bias=param$h2h.bias, - num.hidden=num.hidden, - name=paste0("t", seqidx, ".l", layeridx, ".h2h")) - hidden <- i2h + h2h - - hidden <- mx.symbol.Activation(data=hidden, act.type="tanh") - if (batch.norm) - hidden <- mx.symbol.BatchNorm(data=hidden) - return (list(h=hidden)) -} - -# unrolled rnn network -rnn.unroll <- function(num.rnn.layer, seq.len, input.size, num.hidden, - num.embed, num.label, dropout=0., batch.norm=FALSE) { - embed.weight <- mx.symbol.Variable("embed.weight") - cls.weight <- mx.symbol.Variable("cls.weight") - cls.bias <- mx.symbol.Variable("cls.bias") - param.cells <- lapply(1:num.rnn.layer, function(i) { - cell <- list(i2h.weight = mx.symbol.Variable(paste0("l", i, ".i2h.weight")), - i2h.bias = mx.symbol.Variable(paste0("l", i, ".i2h.bias")), - h2h.weight = mx.symbol.Variable(paste0("l", i, ".h2h.weight")), - h2h.bias = mx.symbol.Variable(paste0("l", i, ".h2h.bias"))) - return (cell) - }) - last.states <- lapply(1:num.rnn.layer, function(i) { - state <- list(h=mx.symbol.Variable(paste0("l", i, ".init.h"))) - return (state) - }) - - # embeding layer - label <- mx.symbol.Variable("label") - data <- mx.symbol.Variable("data") - embed <- mx.symbol.Embedding(data=data, input_dim=input.size, - weight=embed.weight, output_dim=num.embed, name="embed") - wordvec <- mx.symbol.SliceChannel(data=embed, num_outputs=seq.len, squeeze_axis=1) - - last.hidden <- list() - for (seqidx in 1:seq.len) { - hidden <- wordvec[[seqidx]] - # stack RNN - for (i in 1:num.rnn.layer) { - dp <- ifelse(i==1, 0, dropout) - next.state <- rnn(num.hidden, indata=hidden, - prev.state=last.states[[i]], - param=param.cells[[i]], - seqidx=seqidx, layeridx=i, - dropout=dp, batch.norm=batch.norm) - hidden <- next.state$h - last.states[[i]] <- next.state - } - # decoder - if (dropout > 0.) - hidden <- mx.symbol.Dropout(data=hidden, p=dropout) - last.hidden <- c(last.hidden, hidden) - } - last.hidden$dim <- 0 - last.hidden$num.args <- seq.len - concat <-mxnet:::mx.varg.symbol.Concat(last.hidden) - fc <- mx.symbol.FullyConnected(data=concat, - weight=cls.weight, - bias=cls.bias, - num.hidden=num.label) - label <- mx.symbol.transpose(data=label) - label <- mx.symbol.Reshape(data=label, target.shape=c(0)) - - loss.all <- mx.symbol.SoftmaxOutput(data=fc, label=label, name="sm") - return (loss.all) -} - -# rnn inference model symbol -rnn.inference.symbol <- function(num.rnn.layer, seq.len, input.size, num.hidden, - num.embed, num.label, dropout=0., batch.norm=FALSE) { - seqidx <- 0 - embed.weight <- mx.symbol.Variable("embed.weight") - cls.weight <- mx.symbol.Variable("cls.weight") - cls.bias <- mx.symbol.Variable("cls.bias") - param.cells <- lapply(1:num.rnn.layer, function(i) { - cell <- list(i2h.weight = mx.symbol.Variable(paste0("l", i, ".i2h.weight")), - i2h.bias = mx.symbol.Variable(paste0("l", i, ".i2h.bias")), - h2h.weight = mx.symbol.Variable(paste0("l", i, ".h2h.weight")), - h2h.bias = mx.symbol.Variable(paste0("l", i, ".h2h.bias"))) - return (cell) - }) - last.states <- lapply(1:num.rnn.layer, function(i) { - state <- list(h=mx.symbol.Variable(paste0("l", i, ".init.h"))) - return (state) - }) - - # embeding layer - data <- mx.symbol.Variable("data") - hidden <- mx.symbol.Embedding(data=data, input_dim=input.size, - weight=embed.weight, output_dim=num.embed, name="embed") - # stack RNN - for (i in 1:num.rnn.layer) { - dp <- ifelse(i==1, 0, dropout) - next.state <- rnn(num.hidden, indata=hidden, - prev.state=last.states[[i]], - param=param.cells[[i]], - seqidx=seqidx, layeridx=i, - dropout=dp, batch.norm=batch.norm) - hidden <- next.state$h - last.states[[i]] <- next.state - } - # decoder - if (dropout > 0.) - hidden <- mx.symbol.Dropout(data=hidden, p=dropout) - - fc <- mx.symbol.FullyConnected(data=hidden, - weight=cls.weight, - bias=cls.bias, - num_hidden=num.label) - sm <- mx.symbol.SoftmaxOutput(data=fc, name='sm') - unpack.h <- lapply(1:num.rnn.layer, function(i) { - state <- last.states[[i]] - state.h <- mx.symbol.BlockGrad(state$h, name=paste0("l", i, ".last.h")) - return (state.h) - }) - list.all <- c(sm, unpack.h) - return (mx.symbol.Group(list.all)) -} - -#' Training RNN Unrolled Model -#' -#' @param train.data mx.io.DataIter or list(data=R.array, label=R.array) -#' The Training set. -#' @param eval.data mx.io.DataIter or list(data=R.array, label=R.array), optional -#' The validation set used for validation evaluation during the progress. -#' @param num.rnn.layer integer -#' The number of the layer of rnn. -#' @param seq.len integer -#' The length of the input sequence. -#' @param num.hidden integer -#' The number of hidden nodes. -#' @param num.embed integer -#' The output dim of embedding. -#' @param num.label integer -#' The number of labels. -#' @param batch.size integer -#' The batch size used for R array training. -#' @param input.size integer -#' The input dim of one-hot encoding of embedding -#' @param ctx mx.context, optional -#' The device used to perform training. -#' @param num.round integer, default=10 -#' The number of iterations over training data to train the model. -#' @param update.period integer, default=1 -#' The number of iterations to update parameters during training period. -#' @param initializer initializer object. default=mx.init.uniform(0.01) -#' The initialization scheme for parameters. -#' @param dropout float, default=0 -#' A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer. -#' @param optimizer string, default="sgd" -#' The optimization method. -#' @param batch.norm boolean, default=FALSE -#' Whether to use batch normalization. -#' @param ... other parameters passing to \code{mx.rnn}/. -#' @return model A trained rnn unrolled model. -#' -#' @export -mx.rnn <- function( train.data, eval.data=NULL, - num.rnn.layer, seq.len, - num.hidden, num.embed, num.label, - batch.size, input.size, - ctx=mx.ctx.default(), - num.round=10, update.period=1, - initializer=mx.init.uniform(0.01), - dropout=0, optimizer='sgd', - batch.norm=FALSE, - ...) { - # check data and change data into iterator - train.data <- check.data(train.data, batch.size, TRUE) - eval.data <- check.data(eval.data, batch.size, FALSE) - - # get unrolled rnn symbol - rnn.sym <- rnn.unroll( num.rnn.layer=num.rnn.layer, - num.hidden=num.hidden, - seq.len=seq.len, - input.size=input.size, - num.embed=num.embed, - num.label=num.label, - dropout=dropout, - batch.norm=batch.norm) - init.states.name <- lapply(1:num.rnn.layer, function(i) { - state <- paste0("l", i, ".init.h") - return (state) - }) - # set up rnn model - model <- setup.rnn.model(rnn.sym=rnn.sym, - ctx=ctx, - num.rnn.layer=num.rnn.layer, - seq.len=seq.len, - num.hidden=num.hidden, - num.embed=num.embed, - num.label=num.label, - batch.size=batch.size, - input.size=input.size, - init.states.name=init.states.name, - initializer=initializer, - dropout=dropout) - # train rnn model - model <- train.rnn( model, train.data, eval.data, - num.round=num.round, - update.period=update.period, - ctx=ctx, - init.states.name=init.states.name, - ...) - # change model into MXFeedForwardModel - model <- list(symbol=model$symbol, arg.params=model$rnn.exec$ref.arg.arrays, aux.params=model$rnn.exec$ref.aux.arrays) - return(structure(model, class="MXFeedForwardModel")) -} - -#' Create a RNN Inference Model -#' -#' @param num.rnn.layer integer -#' The number of the layer of rnn. -#' @param input.size integer -#' The input dim of one-hot encoding of embedding -#' @param num.hidden integer -#' The number of hidden nodes. -#' @param num.embed integer -#' The output dim of embedding. -#' @param num.label integer -#' The number of labels. -#' @param batch.size integer, default=1 -#' The batch size used for R array training. -#' @param arg.params list -#' The batch size used for R array training. -#' @param ctx mx.context, optional -#' Model parameter, list of name to NDArray of net's weights. -#' @param dropout float, default=0 -#' A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer. -#' @param batch.norm boolean, default=FALSE -#' Whether to use batch normalization. -#' @return model list(rnn.exec=integer, symbol=mxnet symbol, num.rnn.layer=integer, num.hidden=integer, seq.len=integer, batch.size=integer, num.embed=integer) -#' A rnn inference model. -#' -#' @export -mx.rnn.inference <- function( num.rnn.layer, - input.size, - num.hidden, - num.embed, - num.label, - batch.size=1, - arg.params, - ctx=mx.cpu(), - dropout=0., - batch.norm=FALSE) { - sym <- rnn.inference.symbol( num.rnn.layer=num.rnn.layer, - input.size=input.size, - num.hidden=num.hidden, - num.embed=num.embed, - num.label=num.label, - dropout=dropout, - batch.norm=batch.norm) - # init.states.name <- c() - # for (i in 1:num.rnn.layer) { - # init.states.name <- c(init.states.name, paste0("l", i, ".init.c")) - # init.states.name <- c(init.states.name, paste0("l", i, ".init.h")) - # } - init.states.name <- lapply(1:num.rnn.layer, function(i) { - state <- paste0("l", i, ".init.h") - return (state) - }) - - seq.len <- 1 - # set up rnn model - model <- setup.rnn.model(rnn.sym=sym, - ctx=ctx, - num.rnn.layer=num.rnn.layer, - seq.len=seq.len, - num.hidden=num.hidden, - num.embed=num.embed, - num.label=num.label, - batch.size=batch.size, - input.size=input.size, - init.states.name=init.states.name, - initializer=mx.init.uniform(0.01), - dropout=dropout) - arg.names <- names(model$rnn.exec$ref.arg.arrays) - for (k in names(arg.params)) { - if ((k %in% arg.names) && is.param.name(k) ) { - rnn.input <- list() - rnn.input[[k]] <- arg.params[[k]] - mx.exec.update.arg.arrays(model$rnn.exec, rnn.input, match.name=TRUE) - } - } - init.states <- list() - for (i in 1:num.rnn.layer) { - init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0 - } - mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE) - - return (model) -} - -#' Using forward function to predict in rnn inference model -#' -#' @param model rnn model -#' A rnn inference model -#' @param input.data, array.matrix -#' The input data for forward function -#' @param new.seq boolean, default=FALSE -#' Whether the input is the start of a new sequence -#' -#' @return result A list(prob=prob, model=model) containing the result probability of each label and the model. -#' -#' @export -mx.rnn.forward <- function(model, input.data, new.seq=FALSE) { - if (new.seq == TRUE) { - init.states <- list() - for (i in 1:model$num.rnn.layer) { - init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0 - } - mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE) - } - dim(input.data) <- c(model$batch.size) - data <- list(data=mx.nd.array(input.data)) - mx.exec.update.arg.arrays(model$rnn.exec, data, match.name=TRUE) - mx.exec.forward(model$rnn.exec, is.train=FALSE) - init.states <- list() - for (i in 1:model$num.rnn.layer) { - init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.outputs[[paste0("l", i, ".last.h_output")]] - } - mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE) - #print (model$rnn.exec$ref) - prob <- model$rnn.exec$ref.outputs[["sm_output"]] - print ("prob") - print (prob) - return (list(prob=prob, model=model)) -} diff --git a/R-package/R/rnn.graph.R b/R-package/R/rnn.graph.R new file mode 100644 index 000000000000..2c099f08028a --- /dev/null +++ b/R-package/R/rnn.graph.R @@ -0,0 +1,329 @@ +# +#' Generate a RNN symbolic model - requires CUDA +#' +#' @param config Either seq-to-one or one-to-one +#' @param cell.type Type of RNN cell: either gru or lstm +#' @param num.rnn.layer int, number of stacked layers +#' @param num.hidden int, size of the state in each RNN layer +#' @param num.embed int, default = NULL - no embedding. Dimension of the embedding vectors +#' @param num.decode int, number of output variables in the decoding layer +#' @param input.size int, number of levels in the data - only used for embedding +#' @param dropout +#' +#' @export +rnn.graph <- function(num.rnn.layer, + input.size = NULL, + num.embed = NULL, + num.hidden, + num.decode, + dropout = 0, + ignore_label = -1, + loss_output = NULL, + config, + cell.type, + masking = F, + output_last_state = F) { + + # define input arguments + data <- mx.symbol.Variable("data") + label <- mx.symbol.Variable("label") + seq.mask <- mx.symbol.Variable("seq.mask") + + if (!is.null(num.embed)) embed.weight <- mx.symbol.Variable("embed.weight") + + rnn.params.weight <- mx.symbol.Variable("rnn.params.weight") + rnn.state <- mx.symbol.Variable("rnn.state") + + if (cell.type == "lstm") { + rnn.state.cell <- mx.symbol.Variable("rnn.state.cell") + } + + cls.weight <- mx.symbol.Variable("cls.weight") + cls.bias <- mx.symbol.Variable("cls.bias") + + if (!is.null(num.embed)){ + data <- mx.symbol.Embedding(data=data, input_dim=input.size, + weight=embed.weight, output_dim=num.embed, name="embed") + } + + # RNN cells + if (cell.type == "lstm") { + rnn <- mx.symbol.RNN(data=data, state=rnn.state, state_cell = rnn.state.cell, parameters=rnn.params.weight, state.size=num.hidden, num.layers=num.rnn.layer, bidirectional=F, mode=cell.type, state.outputs=output_last_state, p=dropout, name=paste(cell.type, num.rnn.layer, "layer", sep="_")) + + } else { + rnn <- mx.symbol.RNN(data=data, state=rnn.state, parameters=rnn.params.weight, state.size=num.hidden, num.layers=num.rnn.layer, bidirectional=F, mode=cell.type, state.outputs=output_last_state, p=dropout, name=paste(cell.type, num.rnn.layer, "layer", sep="_")) + } + + # Decode + if (config=="seq-to-one") { + + if (masking) mask <- mx.symbol.SequenceLast(data=rnn[[1]], use.sequence.length = T, sequence_length = seq.mask, name = "mask") else + mask <- mx.symbol.SequenceLast(data=rnn[[1]], use.sequence.length = F, name = "mask") + + decode <- mx.symbol.FullyConnected(data=mask, + weight=cls.weight, + bias=cls.bias, + num.hidden=num.decode, + name = "decode") + + if (!is.null(loss_output)) { + loss <- switch(loss_output, + softmax = mx.symbol.SoftmaxOutput(data=decode, label=label, use_ignore = !ignore_label == -1, ignore_label = ignore_label, name = "loss"), + linear = mx.symbol.LinearRegressionOutput(data=decode, label=label, name = "loss"), + logictic = mx.symbol.LogisticRegressionOutput(data=decode, label=label, name = "loss"), + MAE = mx.symbol.MAERegressionOutput(data=decode, label=label, name = "loss") + ) + } else loss <- decode + + } else if (config=="one-to-one"){ + + if (masking) mask <- mx.symbol.SequenceMask(data = rnn[[1]], use.sequence.length = T, sequence_length = seq.mask, value = 0, name = "mask") else + mask <- mx.symbol.identity(data = rnn[[1]], name = "mask") + + mask = mx.symbol.reshape(mask, shape=c(num.hidden, -1)) + + decode <- mx.symbol.FullyConnected(data=reshape, + weight=cls.weight, + bias=cls.bias, + num.hidden=num.decode, + name = "decode") + + label <- mx.symbol.reshape(data=label, shape=c(-1), name = "label_reshape") + + if (!is.null(loss_output)) { + loss <- switch(loss_output, + softmax = mx.symbol.SoftmaxOutput(data=decode, label=label, use_ignore = !ignore_label == -1, ignore_label = ignore_label, name = "loss"), + linear = mx.symbol.LinearRegressionOutput(data=decode, label=label, name = "loss"), + logictic = mx.symbol.LogisticRegressionOutput(data=decode, label=label, name = "loss"), + MAE = mx.symbol.MAERegressionOutput(data=decode, label=label, name = "loss") + ) + } else loss <- decode + } + return(loss) +} + + +# LSTM cell symbol +lstm.cell <- function(num.hidden, indata, prev.state, param, seqidx, layeridx, dropout = 0) { + i2h <- mx.symbol.FullyConnected(data = indata, weight = param$i2h.weight, bias = param$i2h.bias, + num.hidden = num.hidden * 4, name = paste0("t", seqidx, ".l", layeridx, ".i2h")) + + if (dropout > 0) + i2h <- mx.symbol.Dropout(data = i2h, p = dropout) + + if (!is.null(prev.state)) { + h2h <- mx.symbol.FullyConnected(data = prev.state$h, weight = param$h2h.weight, + bias = param$h2h.bias, num.hidden = num.hidden * 4, + name = paste0("t", seqidx, ".l", layeridx, ".h2h")) + gates <- i2h + h2h + } else { + gates <- i2h + } + + split.gates <- mx.symbol.split(gates, num.outputs = 4, axis = 1, squeeze.axis = F, + name = paste0("t", seqidx, ".l", layeridx, ".slice")) + + in.gate <- mx.symbol.Activation(split.gates[[1]], act.type = "sigmoid") + in.transform <- mx.symbol.Activation(split.gates[[2]], act.type = "tanh") + forget.gate <- mx.symbol.Activation(split.gates[[3]], act.type = "sigmoid") + out.gate <- mx.symbol.Activation(split.gates[[4]], act.type = "sigmoid") + + if (is.null(prev.state)) { + next.c <- in.gate * in.transform + } else { + next.c <- (forget.gate * prev.state$c) + (in.gate * in.transform) + } + + next.h <- out.gate * mx.symbol.Activation(next.c, act.type = "tanh") + + return(list(c = next.c, h = next.h)) +} + +# GRU cell symbol +gru.cell <- function(num.hidden, indata, prev.state, param, seqidx, layeridx, dropout = 0) { + i2h <- mx.symbol.FullyConnected(data = indata, weight = param$gates.i2h.weight, + bias = param$gates.i2h.bias, num.hidden = num.hidden * 2, + name = paste0("t", seqidx, ".l", layeridx, ".gates.i2h")) + + if (dropout > 0) + i2h <- mx.symbol.Dropout(data = i2h, p = dropout) + + if (!is.null(prev.state)) { + h2h <- mx.symbol.FullyConnected(data = prev.state$h, weight = param$gates.h2h.weight, + bias = param$gates.h2h.bias, num.hidden = num.hidden * 2, + name = paste0("t", seqidx, ".l", layeridx, ".gates.h2h")) + gates <- i2h + h2h + } else { + gates <- i2h + } + + split.gates <- mx.symbol.split(gates, num.outputs = 2, axis = 1, squeeze.axis = F, + name = paste0("t", seqidx, ".l", layeridx, ".split")) + + update.gate <- mx.symbol.Activation(split.gates[[1]], act.type = "sigmoid") + reset.gate <- mx.symbol.Activation(split.gates[[2]], act.type = "sigmoid") + + htrans.i2h <- mx.symbol.FullyConnected(data = indata, weight = param$trans.i2h.weight, + bias = param$trans.i2h.bias, num.hidden = num.hidden, + name = paste0("t", seqidx, ".l", layeridx, ".trans.i2h")) + + if (is.null(prev.state)) { + h.after.reset <- reset.gate * 0 + } else { + h.after.reset <- prev.state$h * reset.gate + } + + htrans.h2h <- mx.symbol.FullyConnected(data = h.after.reset, weight = param$trans.h2h.weight, + bias = param$trans.h2h.bias, num.hidden = num.hidden, + name = paste0("t", seqidx, ".l", layeridx, ".trans.h2h")) + + h.trans <- htrans.i2h + htrans.h2h + h.trans.active <- mx.symbol.Activation(h.trans, act.type = "tanh") + + if (is.null(prev.state)) { + next.h <- update.gate * h.trans.active + } else { + next.h <- prev.state$h + update.gate * (h.trans.active - prev.state$h) + } + + return(list(h = next.h)) +} + +# +#' unroll representation of RNN running on non CUDA device - under development +#' +#' @export +rnn.graph.unroll <- function(num.rnn.layer, + seq.len, + input.size = NULL, + num.embed = NULL, + num.hidden, + num.decode, + dropout = 0, + ignore_label = -1, + loss_output = NULL, + init.state = NULL, + config, + cell.type = "lstm", + masking = F, + output_last_state = F) { + + + if (!is.null(num.embed)) embed.weight <- mx.symbol.Variable("embed.weight") + + cls.weight <- mx.symbol.Variable("cls.weight") + cls.bias <- mx.symbol.Variable("cls.bias") + + param.cells <- lapply(1:num.rnn.layer, function(i) { + + if (cell.type=="lstm"){ + cell <- list(i2h.weight = mx.symbol.Variable(paste0("l", i, ".i2h.weight")), + i2h.bias = mx.symbol.Variable(paste0("l", i, ".i2h.bias")), + h2h.weight = mx.symbol.Variable(paste0("l", i, ".h2h.weight")), + h2h.bias = mx.symbol.Variable(paste0("l", i, ".h2h.bias"))) + } else if (cell.type=="gru"){ + cell <- list(gates.i2h.weight = mx.symbol.Variable(paste0("l", i, ".gates.i2h.weight")), + gates.i2h.bias = mx.symbol.Variable(paste0("l", i, ".gates.i2h.bias")), + gates.h2h.weight = mx.symbol.Variable(paste0("l", i, ".gates.h2h.weight")), + gates.h2h.bias = mx.symbol.Variable(paste0("l", i, ".gates.h2h.bias")), + trans.i2h.weight = mx.symbol.Variable(paste0("l", i, ".trans.i2h.weight")), + trans.i2h.bias = mx.symbol.Variable(paste0("l", i, ".trans.i2h.bias")), + trans.h2h.weight = mx.symbol.Variable(paste0("l", i, ".trans.h2h.weight")), + trans.h2h.bias = mx.symbol.Variable(paste0("l", i, ".trans.h2h.bias"))) + } + return (cell) + }) + + # embeding layer + data <- mx.symbol.Variable("data") + label <- mx.symbol.Variable("label") + seq.mask <- mx.symbol.Variable("seq.mask") + + if (!is.null(num.embed)) { + data <- mx.symbol.Embedding(data = data, input_dim = input.size, + weight=embed.weight, output_dim = num.embed, name = "embed") + } + + data <- mx.symbol.split(data = data, axis = 0, num.outputs = seq.len, squeeze_axis = T) + + last.hidden <- list() + last.states <- list() + + for (seqidx in 1:seq.len) { + hidden <- data[[seqidx]] + + for (i in 1:num.rnn.layer) { + + if (seqidx==1) prev.state<- init.state[[i]] else prev.state <- last.states[[i]] + + if (cell.type=="lstm") { + cell.symbol <- lstm.cell + } else if (cell.type=="gru"){ + cell.symbol <- gru.cell + } + + next.state <- cell.symbol(num.hidden = num.hidden, + indata = hidden, + prev.state = prev.state, + param = param.cells[[i]], + seqidx = seqidx, + layeridx = i, + dropout = dropout) + hidden <- next.state$h + last.states[[i]] <- next.state + } + + # Aggregate outputs from each timestep + last.hidden <- c(last.hidden, hidden) + } + + # concat hidden units - concat seq.len blocks of dimension num.hidden x batch.size + concat <- mx.symbol.concat(data = last.hidden, num.args = seq.len, dim = 0, name = "concat") + concat <- mx.symbol.reshape(data = concat, shape = c(num.hidden, -1, seq.len), name = "rnn_reshape") + + if (config=="seq-to-one"){ + + if (masking) mask <- mx.symbol.SequenceLast(data=concat, use.sequence.length = T, sequence_length = seq.mask, name = "mask") else + mask <- mx.symbol.SequenceLast(data=concat, use.sequence.length = F, name = "mask") + + decode <- mx.symbol.FullyConnected(data = mask, + weight = cls.weight, + bias = cls.bias, + num.hidden = num.decode, + name = "decode") + + if (!is.null(loss_output)) { + loss <- switch(loss_output, + softmax = mx.symbol.SoftmaxOutput(data=decode, label=label, use_ignore = !ignore_label == -1, ignore_label = ignore_label, name = "loss"), + linear = mx.symbol.LinearRegressionOutput(data=decode, label=label, name = "loss"), + logictic = mx.symbol.LogisticRegressionOutput(data=decode, label=label, name = "loss"), + MAE = mx.symbol.MAERegressionOutput(data=decode, label=label, name = "loss") + ) + } else loss <- decode + + } else if (config=="one-to-one"){ + + if (masking) mask <- mx.symbol.SequenceMask(data = concat, use.sequence.length = T, sequence_length = seq.mask, value = 0, name = "mask") else + mask <- mx.symbol.identity(data = concat, name = "mask") + + mask = mx.symbol.reshape(mask, shape=c(num.hidden, -1)) + + decode <- mx.symbol.FullyConnected(data = mask, + weight = cls.weight, + bias = cls.bias, + num.hidden = num.decode, + name = "decode") + + label <- mx.symbol.reshape(data = label, shape = -1, name = "label_reshape") + + if (!is.null(loss_output)) { + loss <- switch(loss_output, + softmax = mx.symbol.SoftmaxOutput(data=decode, label=label, use_ignore = !ignore_label == -1, ignore_label = ignore_label, name = "loss"), + linear = mx.symbol.LinearRegressionOutput(data=decode, label=label, name = "loss"), + logictic = mx.symbol.LogisticRegressionOutput(data=decode, label=label, name = "loss"), + MAE = mx.symbol.MAERegressionOutput(data=decode, label=label, name = "loss") + ) + } else loss <- decode + } + return(loss) +} diff --git a/R-package/R/rnn.infer.R b/R-package/R/rnn.infer.R new file mode 100644 index 000000000000..c9ccecbddbeb --- /dev/null +++ b/R-package/R/rnn.infer.R @@ -0,0 +1,177 @@ +# +#' Inference of RNN model +#' +#' @param infer.data Data iterator created by mx.io.bucket.iter +#' @param model Model used for inference +#' @param ctx The element to mask +#' +#' @export +mx.infer.buckets <- function(infer.data, model, ctx = mx.cpu()) { + + ### Initialise the iterator + infer.data$reset() + infer.data$iter.next() + + if (is.null(ctx)) + ctx <- mx.ctx.default() + if (is.mx.context(ctx)) { + ctx <- list(ctx) + } + if (!is.list(ctx)) + stop("ctx must be mx.context or list of mx.context") + + ndevice <- length(ctx) + symbol <- model$symbol + if (is.list(symbol)) sym_ini <- symbol[[names(train.data$bucketID)]] else sym_ini <- symbol + + arguments <- sym_ini$arguments + input.names <- intersect(names(infer.data$value()), arguments) + + input.shape <- sapply(input.names, function(n) { + dim(infer.data$value()[[n]]) + }, simplify = FALSE) + + shapes <- sym_ini$infer.shape(input.shape) + + # initialize all arguments with zeros + arguments.ini <- lapply(shapes$arg.shapes, function(shape) { + mx.nd.zeros(shape = shape, ctx = mx.cpu()) + }) + + arg.params <- model$arg.params + arg.params.names <- names(arg.params) + aux.params <- model$aux.params + + # Initial binding + dlist <- arguments.ini[input.names] + + # Assign fixed parameters to their value and keep non initialized arguments to zero + arg.params.fix.names <- setdiff(arguments, c(arg.params.names, input.names)) + + # Assign zeros to non initialized arg parameters + arg.params.fix <- arguments.ini[arg.params.fix.names] + + # Grad request + grad.req <- rep("null", length(arguments)) + + # Arg array order + update_names <- c(input.names, arg.params.fix.names, arg.params.names) + arg_update_idx <- match(arguments, update_names) + + execs <- mx.symbol.bind(symbol = symbol, arg.arrays = c(dlist, arg.params.fix, arg.params)[arg_update_idx], + aux.arrays = aux.params, ctx = ctx[[1]], grad.req = grad.req) + + # Initial input shapes - need to be adapted for multi-devices - divide highest + # dimension by device nb + + packer <- mx.nd.arraypacker() + infer.data$reset() + while (infer.data$iter.next()) { + + # Get input data slice + dlist <- infer.data$value() #[input.names] + + execs <- mx.symbol.bind(symbol = symbol, arg.arrays = c(dlist, execs$arg.arrays[arg.params.fix.names], execs$arg.arrays[arg.params.names])[arg_update_idx], + aux.arrays = execs$aux.arrays, ctx = ctx[[1]], grad.req = grad.req) + + mx.exec.forward(execs, is.train = FALSE) + + out.pred <- mx.nd.copyto(execs$ref.outputs[[1]], mx.cpu()) + padded <- infer.data$num.pad() + oshape <- dim(out.pred) + ndim <- length(oshape) + packer$push(mx.nd.slice.axis(data = out.pred, axis = 0, begin = 0, end = oshape[[ndim]] - padded)) + + } + infer.data$reset() + return(packer$get()) +} + + + +### inference for one-to-one models +mx.infer.buckets.one <- function(infer.data, + symbol, arg.params, aux.params, input.params = NULL, + ctx = mx.cpu()) { + + ### Initialise the iterator + infer.data$reset() + infer.data$iter.next() + + if (is.null(ctx)) + ctx <- mx.ctx.default() + if (is.mx.context(ctx)) { + ctx <- list(ctx) + } + if (!is.list(ctx)) + stop("ctx must be mx.context or list of mx.context") + + ndevice <- length(ctx) + + arguments <- symbol$arguments + input.names <- intersect(names(infer.data$value()), arguments) + + input.shape <- sapply(input.names, function(n) { + dim(infer.data$value()[[n]]) + }, simplify = FALSE) + + shapes <- symbol$infer.shape(input.shape) + + # initialize all arguments with zeros + arguments.ini <- lapply(shapes$arg.shapes, function(shape) { + mx.nd.zeros(shape = shape, ctx = mx.cpu()) + }) + + arg.params <- arg.params + arg.params.names <- names(arg.params) + + dlist <- arguments.ini[input.names] + + # Assign fixed parameters to their value and keep non initialized arguments to zero + arg.params.fix.names <- unique(c(names(input.params), setdiff(arguments, c(arg.params.names, input.names)))) + + # Assign zeros to non initialized arg parameters + arg.params.fix <- arguments.ini[arg.params.fix.names] + # Assign weights to arguments specifies by input.params + arg.params.fix[names(input.params)] <- input.params + + aux.params <- aux.params + + # Grad request + grad.req <- rep("null", length(arguments)) + + # Arg array order + update_names <- c(input.names, arg.params.fix.names, arg.params.names) + arg_update_idx <- match(arguments, update_names) + + # Initial binding + execs <- mx.symbol.bind(symbol = symbol, + arg.arrays = c(dlist, arg.params.fix, arg.params)[arg_update_idx], + aux.arrays = aux.params, ctx = ctx[[1]], grad.req = grad.req) + + # Initial input shapes - need to be adapted for multi-devices - divide highest + # dimension by device nb + + infer.data$reset() + while (infer.data$iter.next()) { + + # Get input data slice + dlist <- infer.data$value()[input.names] + + execs <- mx.symbol.bind(symbol = symbol, + arg.arrays = c(dlist, execs$arg.arrays[arg.params.fix.names], execs$arg.arrays[arg.params.names])[arg_update_idx], + aux.arrays = execs$aux.arrays, ctx = ctx[[1]], grad.req = grad.req) + + mx.exec.forward(execs, is.train = FALSE) + + out.pred <- mx.nd.copyto(execs$ref.outputs[[1]], mx.cpu()) + state <- mx.nd.copyto(execs$ref.outputs[[2]], mx.cpu()) + state_cell <- mx.nd.copyto(execs$ref.outputs[[3]], mx.cpu()) + + out <- lapply(execs$ref.outputs, function(out) { + mx.nd.copyto(out, mx.cpu()) + }) + } + infer.data$reset() + return(out) +} diff --git a/R-package/R/rnn_model.R b/R-package/R/rnn_model.R deleted file mode 100644 index aa4a7d03ca9b..000000000000 --- a/R-package/R/rnn_model.R +++ /dev/null @@ -1,258 +0,0 @@ -is.param.name <- function(name) { - return (grepl('weight$', name) || grepl('bias$', name) || - grepl('gamma$', name) || grepl('beta$', name) ) -} - -# Initialize the data iter -mx.model.init.iter.rnn <- function(X, y, batch.size, is.train) { - if (is.mx.dataiter(X)) return(X) - shape <- dim(X) - if (is.null(shape)) { - num.data <- length(X) - } else { - ndim <- length(shape) - num.data <- shape[[ndim]] - } - if (is.null(y)) { - if (is.train) stop("Need to provide parameter y for training with R arrays.") - y <- c(1:num.data) * 0 - } - - batch.size <- min(num.data, batch.size) - - return(mx.io.arrayiter(X, y, batch.size=batch.size, shuffle=is.train)) -} - -# set up rnn model with rnn cells -setup.rnn.model <- function(rnn.sym, ctx, - num.rnn.layer, seq.len, - num.hidden, num.embed, num.label, - batch.size, input.size, - init.states.name, - initializer=mx.init.uniform(0.01), - dropout=0) { - - arg.names <- rnn.sym$arguments - input.shapes <- list() - for (name in arg.names) { - if (name %in% init.states.name) { - input.shapes[[name]] <- c(num.hidden, batch.size) - } - else if (grepl('data$', name) || grepl('label$', name) ) { - if (seq.len == 1) { - input.shapes[[name]] <- c(batch.size) - } else { - input.shapes[[name]] <- c(seq.len, batch.size) - } - } - } - params <- mx.model.init.params(rnn.sym, input.shapes, NULL, initializer, mx.cpu()) - args <- input.shapes - args$symbol <- rnn.sym - args$ctx <- ctx - args$grad.req <- "write" - rnn.exec <- do.call(mx.simple.bind, args) - - mx.exec.update.arg.arrays(rnn.exec, params$arg.params, match.name=TRUE) - mx.exec.update.aux.arrays(rnn.exec, params$aux.params, match.name=TRUE) - - grad.arrays <- list() - for (name in names(rnn.exec$ref.grad.arrays)) { - if (is.param.name(name)) - grad.arrays[[name]] <- rnn.exec$ref.arg.arrays[[name]]*0 - } - mx.exec.update.grad.arrays(rnn.exec, grad.arrays, match.name=TRUE) - - return (list(rnn.exec=rnn.exec, symbol=rnn.sym, - num.rnn.layer=num.rnn.layer, num.hidden=num.hidden, - seq.len=seq.len, batch.size=batch.size, - num.embed=num.embed)) - -} - - -calc.nll <- function(seq.label.probs, batch.size) { - nll = - sum(log(seq.label.probs)) / batch.size - return (nll) -} - -get.label <- function(label, ctx) { - label <- as.array(label) - seq.len <- dim(label)[[1]] - batch.size <- dim(label)[[2]] - sm.label <- array(0, dim=c(seq.len*batch.size)) - for (seqidx in 1:seq.len) { - sm.label[((seqidx-1)*batch.size+1) : (seqidx*batch.size)] <- label[seqidx,] - } - return (mx.nd.array(sm.label, ctx)) -} - - -# training rnn model -train.rnn <- function (model, train.data, eval.data, - num.round, update.period, - init.states.name, - optimizer='sgd', ctx=mx.ctx.default(), - epoch.end.callback, - batch.end.callback, - verbose=TRUE, - ...) { - m <- model - - model <- list(symbol=model$symbol, arg.params=model$rnn.exec$ref.arg.arrays, - aux.params=model$rnn.exec$ref.aux.arrays) - - seq.len <- m$seq.len - batch.size <- m$batch.size - num.rnn.layer <- m$num.rnn.layer - num.hidden <- m$num.hidden - - opt <- mx.opt.create(optimizer, rescale.grad=(1/batch.size), ...) - - updater <- mx.opt.get.updater(opt, m$rnn.exec$ref.arg.arrays) - epoch.counter <- 0 - log.period <- max(as.integer(1000 / seq.len), 1) - last.perp <- 10000000.0 - - for (iteration in 1:num.round) { - nbatch <- 0 - train.nll <- 0 - # reset states - init.states <- list() - for (name in init.states.name) { - init.states[[name]] <- m$rnn.exec$ref.arg.arrays[[name]]*0 - } - - mx.exec.update.arg.arrays(m$rnn.exec, init.states, match.name=TRUE) - - tic <- Sys.time() - - train.data$reset() - - while (train.data$iter.next()) { - # set rnn input - rnn.input <- train.data$value() - mx.exec.update.arg.arrays(m$rnn.exec, rnn.input, match.name=TRUE) - - mx.exec.forward(m$rnn.exec, is.train=TRUE) - seq.label.probs <- mx.nd.choose.element.0index(m$rnn.exec$ref.outputs[["sm_output"]], get.label(m$rnn.exec$ref.arg.arrays[["label"]], ctx)) - - mx.exec.backward(m$rnn.exec) - init.states <- list() - for (name in init.states.name) { - init.states[[name]] <- m$rnn.exec$ref.arg.arrays[[name]]*0 - } - - mx.exec.update.arg.arrays(m$rnn.exec, init.states, match.name=TRUE) - # update epoch counter - epoch.counter <- epoch.counter + 1 - if (epoch.counter %% update.period == 0) { - # the gradient of initial c and inital h should be zero - init.grad <- list() - for (name in init.states.name) { - init.grad[[name]] <- m$rnn.exec$ref.arg.arrays[[name]]*0 - } - - mx.exec.update.grad.arrays(m$rnn.exec, init.grad, match.name=TRUE) - - arg.blocks <- updater(m$rnn.exec$ref.arg.arrays, m$rnn.exec$ref.grad.arrays) - - mx.exec.update.arg.arrays(m$rnn.exec, arg.blocks, skip.null=TRUE) - - grad.arrays <- list() - for (name in names(m$rnn.exec$ref.grad.arrays)) { - if (is.param.name(name)) - grad.arrays[[name]] <- m$rnn.exec$ref.grad.arrays[[name]]*0 - } - mx.exec.update.grad.arrays(m$rnn.exec, grad.arrays, match.name=TRUE) - - } - - train.nll <- train.nll + calc.nll(as.array(seq.label.probs), batch.size) - - nbatch <- nbatch + seq.len - - if (!is.null(batch.end.callback)) { - batch.end.callback(iteration, nbatch, environment()) - } - - if ((epoch.counter %% log.period) == 0) { - message(paste0("Epoch [", epoch.counter, - "] Train: NLL=", train.nll / nbatch, - ", Perp=", exp(train.nll / nbatch))) - } - } - train.data$reset() - # end of training loop - toc <- Sys.time() - message(paste0("Iter [", iteration, - "] Train: Time: ", as.numeric(toc - tic, units="secs"), - " sec, NLL=", train.nll / nbatch, - ", Perp=", exp(train.nll / nbatch))) - - if (!is.null(eval.data)) { - val.nll <- 0.0 - # validation set, reset states - init.states <- list() - for (name in init.states.name) { - init.states[[name]] <- m$rnn.exec$ref.arg.arrays[[name]]*0 - } - mx.exec.update.arg.arrays(m$rnn.exec, init.states, match.name=TRUE) - - eval.data$reset() - nbatch <- 0 - while (eval.data$iter.next()) { - # set rnn input - rnn.input <- eval.data$value() - mx.exec.update.arg.arrays(m$rnn.exec, rnn.input, match.name=TRUE) - mx.exec.forward(m$rnn.exec, is.train=FALSE) - # probability of each label class, used to evaluate nll - seq.label.probs <- mx.nd.choose.element.0index(m$rnn.exec$ref.outputs[["sm_output"]], get.label(m$rnn.exec$ref.arg.arrays[["label"]], ctx)) - # transfer the states - init.states <- list() - for (name in init.states.name) { - init.states[[name]] <- m$rnn.exec$ref.arg.arrays[[name]]*0 - } - mx.exec.update.arg.arrays(m$rnn.exec, init.states, match.name=TRUE) - val.nll <- val.nll + calc.nll(as.array(seq.label.probs), batch.size) - nbatch <- nbatch + seq.len - } - eval.data$reset() - perp <- exp(val.nll / nbatch) - message(paste0("Iter [", iteration, - "] Val: NLL=", val.nll / nbatch, - ", Perp=", exp(val.nll / nbatch))) - } - # get the model out - - - epoch_continue <- TRUE - if (!is.null(epoch.end.callback)) { - epoch_continue <- epoch.end.callback(iteration, 0, environment(), verbose = verbose) - } - - if (!epoch_continue) { - break - } - } - - return (m) -} - -# check data and translate data into iterator if data is array/matrix -check.data <- function(data, batch.size, is.train) { - if (!is.null(data) && !is.list(data) && !is.mx.dataiter(data)) { - stop("The dataset should be either a mx.io.DataIter or a R list") - } - if (is.list(data)) { - if (is.null(data$data) || is.null(data$label)){ - stop("Please provide dataset as list(data=R.array, label=R.array)") - } - data <- mx.model.init.iter.rnn(data$data, data$label, batch.size=batch.size, is.train = is.train) - } - if (!is.null(data) && !data$iter.next()) { - data$reset() - if (!data$iter.next()) stop("Empty input") - } - return (data) -} diff --git a/R-package/R/viz.graph.R b/R-package/R/viz.graph.R index 7d0365b1433f..6d13de0af1d0 100644 --- a/R-package/R/viz.graph.R +++ b/R-package/R/viz.graph.R @@ -45,6 +45,7 @@ graph.viz <- function(symbol, shape=NULL, direction="TD", type="graph", graph.wi "MAERegressionOutput"=, "SVMOutput"=, "LogisticRegressionOutput"=, + "MakeLoss"=, "SoftmaxOutput" = "#b3de69", "#fccde5" # default value ) @@ -122,11 +123,14 @@ graph.viz <- function(symbol, shape=NULL, direction="TD", type="graph", graph.wi stringsAsFactors=F) edges_df$from<- id_dic[as.character(edges_df$from)] - nodes_df_new<- create_node_df(n = nrow(nodes_df), label=nodes_df$label, shape=nodes_df$shape, type="base", penwidth=2, color=nodes_df$color, style="filled", fillcolor=adjustcolor(nodes_df$color, alpha.f = 1)) - edge_df_new<- create_edge_df(from = edges_df$from, to=edges_df$to, color="black") + nodes_df_new<- create_node_df(n = nrow(nodes_df), label=nodes_df$label, shape=nodes_df$shape, type="base", penwidth=2, color=nodes_df$color, style="filled", + fillcolor=adjustcolor(nodes_df$color, alpha.f = 1), fontcolor = "black") + edge_df_new<- create_edge_df(from = edges_df$from, to=edges_df$to, color="black", fontcolor = "black") if (!is.null(shape)){ - edges_labels_raw<- symbol$get.internals()$infer.shape(list(data=shape))$out.shapes + if (is.list(shape)) { + edges_labels_raw<- symbol$get.internals()$infer.shape(shape)$out.shapes + } else edges_labels_raw<- symbol$get.internals()$infer.shape(list(data=shape))$out.shapes if (!is.null(edges_labels_raw)){ edge_label_str <- function(x) paste0(x, collapse="X") edges_labels_raw<- sapply(edges_labels_raw, edge_label_str) @@ -145,9 +149,6 @@ graph.viz <- function(symbol, shape=NULL, direction="TD", type="graph", graph.wi } else { graph_render<- render_graph(graph = graph, output = "graph", width = graph.width.px, height = graph.height.px) } - - # graph <-visNetwork(nodes = nodes_df, edges = edges_df, main = graph.title) %>% - # visHierarchicalLayout(direction = "UD", sortMethod = "directed") return(graph_render) } diff --git a/R-package/README.md b/R-package/README.md index 6576700e11c6..c39b2b101d2e 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -19,7 +19,7 @@ You can install the CPU package directly from the R console: ```r cran <- getOption("repos") -cran["dmlc"] <- "https://s3-us-west-2.amazonaws.com/apache-mxnet/R/CRAN/" +cran["dmlc"] <- "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/CRAN/" options(repos = cran) install.packages("mxnet") ``` diff --git a/R-package/tests/testthat/get_data.R b/R-package/tests/testthat/get_data.R index 6d8de8516ae1..2676b20fa80b 100644 --- a/R-package/tests/testthat/get_data.R +++ b/R-package/tests/testthat/get_data.R @@ -19,7 +19,7 @@ GetMNIST_csv <- function() { } if (!file.exists('data/train.csv') | !file.exists('data/test.csv')) { - download.file('https://s3-us-west-2.amazonaws.com/apache-mxnet/R/data/mnist_csv.zip', + download.file('https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/mnist_csv.zip', destfile = 'data/mnist_csv.zip') unzip('data/mnist_csv.zip', exdir = 'data/') file.remove('data/mnist_csv.zip') @@ -61,7 +61,7 @@ GetCatDog <- function() { } if (!file.exists('data/cats_dogs/cats_dogs_train.rec') | !file.exists('data/cats_dogs/cats_dogs_val.rec')) { - download.file('https://s3-us-west-2.amazonaws.com/apache-mxnet/R/data/cats_dogs.zip', + download.file('https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/cats_dogs.zip', destfile = 'data/cats_dogs.zip') unzip('data/cats_dogs.zip', exdir = 'data/') file.remove('data/cats_dogs.zip') @@ -86,7 +86,7 @@ GetISBI_data <- function() { } if (!file.exists('data/ISBI/train-volume.tif') | !file.exists('data/ISBI/train-labels.tif')) { - download.file('https://s3-us-west-2.amazonaws.com/apache-mxnet/R/data/ISBI.zip', + download.file('https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/ISBI.zip', destfile = 'data/ISBI.zip') unzip('data/ISBI.zip', exdir = 'data/') file.remove('data/ISBI.zip') @@ -99,7 +99,7 @@ GetCaptcha_data <- function() { } if (!file.exists('data/captcha_example/captcha_train.rec') | !file.exists('data/captcha_example/captcha_test.rec')) { - download.file('https://s3-us-west-2.amazonaws.com/apache-mxnet/R/data/captcha_example.zip', + download.file('https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/captcha_example.zip', destfile = 'data/captcha_example.zip') unzip('data/captcha_example.zip', exdir = 'data/') file.remove('data/captcha_example.zip') diff --git a/R-package/tests/testthat/test_img_seg.R b/R-package/tests/testthat/test_img_seg.R index fbca92e2a8a2..b3400cd3bbc6 100644 --- a/R-package/tests/testthat/test_img_seg.R +++ b/R-package/tests/testthat/test_img_seg.R @@ -90,7 +90,7 @@ context("Image segmentation") test_that("UNET", { list.of.packages <- c("imager") new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] - if(length(new.packages)) install.packages(new.packages) + if(length(new.packages)) install.packages(new.packages, repos = "https://cloud.r-project.org/") GetISBI_data() library(imager) IMG_SIZE <- 168 @@ -132,4 +132,4 @@ test_that("UNET", { learning.rate = 0.05, momentum = 0.99, array.batch.size = 2) -}) \ No newline at end of file +}) diff --git a/R-package/tests/testthat/test_lstm.R b/R-package/tests/testthat/test_lstm.R deleted file mode 100644 index 4a5cdbeb436f..000000000000 --- a/R-package/tests/testthat/test_lstm.R +++ /dev/null @@ -1,57 +0,0 @@ -require(mxnet) - -if (Sys.getenv("R_GPU_ENABLE") != "" & as.integer(Sys.getenv("R_GPU_ENABLE")) == 1) { - mx.ctx.default(new = mx.gpu()) - message("Using GPU for testing.") -} - -context("lstm models") - -get.nll <- function(s) { - pat <- ".*\\NLL=(.+), Perp=.*" - nll <- sub(pat, "\\1", s) - return (as.numeric(nll)) -} - -test_that("training error decreasing", { - - # Set basic network parameters. - batch.size = 2 - seq.len = 2 - num.hidden = 1 - num.embed = 2 - num.lstm.layer = 2 - num.round = 5 - learning.rate= 0.1 - wd=0.00001 - clip_gradient=1 - update.period = 1 - vocab=17 - - X.train <- list(data=array(1:16, dim=c(2,8)), label=array(2:17, dim=c(2,8))) - - s <- capture.output(model <- mx.lstm( X.train, - ctx=mx.ctx.default(), - num.round=num.round, - update.period=update.period, - num.lstm.layer=num.lstm.layer, - seq.len=seq.len, - num.hidden=num.hidden, - num.embed=num.embed, - num.label=vocab, - batch.size=batch.size, - input.size=vocab, - initializer=mx.init.uniform(0.01), - learning.rate=learning.rate, - wd=wd, - clip_gradient=clip_gradient)) - - prev.nll <- 10000000.0 - for (r in s) { - nll <- get.nll(r) - expect_true(prev.nll >= nll) - prev.nll <- nll - - } - -}) \ No newline at end of file diff --git a/R-package/vignettes/CustomIterator.Rmd b/R-package/vignettes/CustomIterator.Rmd index 22ac90fe0400..b5a6576a5bc6 100644 --- a/R-package/vignettes/CustomIterator.Rmd +++ b/R-package/vignettes/CustomIterator.Rmd @@ -10,7 +10,7 @@ The data we are going to use is the [MNIST dataset](http://yann.lecun.com/exdb/m To download the data: ```{r} -download.file('https://s3-us-west-2.amazonaws.com/apache-mxnet/R/data/mnist_csv.zip', +download.file('https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/mnist_csv.zip', destfile = 'mnist_csv.zip') unzip('mnist_csv.zip', exdir = '.') ``` diff --git a/R-package/vignettes/mnistCompetition.Rmd b/R-package/vignettes/mnistCompetition.Rmd index 988fd18e8b4d..055f1ae51d7e 100644 --- a/R-package/vignettes/mnistCompetition.Rmd +++ b/R-package/vignettes/mnistCompetition.Rmd @@ -10,7 +10,7 @@ First, let us download the data from [here](https://www.kaggle.com/c/digit-recog Then we can read them in R and convert to matrices. ```{r, echo=FALSE} -download.file('https://s3-us-west-2.amazonaws.com/apache-mxnet/R/data/mnist_csv.zip', destfile = 'mnist_csv.zip') +download.file('https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/R/data/mnist_csv.zip', destfile = 'mnist_csv.zip') unzip('mnist_csv.zip', exdir = '.') ``` diff --git a/README.md b/README.md index a11780aa019b..6e7dc41c1e5b 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,31 @@ Apache MXNet (incubating) for Deep Learning ===== -[![Build Status](https://travis-ci.org/dmlc/mxnet.svg?branch=master)](https://travis-ci.org/dmlc/mxnet) -[![Documentation Status](https://readthedocs.org/projects/mxnet/badge/?version=latest)](http://mxnet.io/) +[![Build Status](https://builds.apache.org/job/incubator-mxnet/job/master/badge/icon)](https://builds.apache.org/job/incubator-mxnet/job/master/) +[![Documentation Status](https://builds.apache.org/job/incubator-mxnet-build-site/badge/icon)](https://mxnet.incubator.apache.org/) [![GitHub license](http://dmlc.github.io/img/apache2.svg)](./LICENSE) ![banner](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/banner.png) Apache MXNet (incubating) is a deep learning framework designed for both *efficiency* and *flexibility*. -It allows you to ***mix*** [symbolic and imperative programming](http://mxnet.io/architecture/index.html#deep-learning-system-design-concepts) +It allows you to ***mix*** [symbolic and imperative programming](https://mxnet.incubator.apache.org/architecture/index.html#deep-learning-system-design-concepts) to ***maximize*** efficiency and productivity. At its core, MXNet contains a dynamic dependency scheduler that automatically parallelizes both symbolic and imperative operations on the fly. A graph optimization layer on top of that makes symbolic execution fast and memory efficient. MXNet is portable and lightweight, scaling effectively to multiple GPUs and multiple machines. MXNet is also more than a deep learning project. It is also a collection of -[blue prints and guidelines](http://mxnet.io/architecture/index.html#deep-learning-system-design-concepts) for building +[blue prints and guidelines](https://mxnet.incubator.apache.org/architecture/index.html#deep-learning-system-design-concepts) for building deep learning systems, and interesting insights of DL systems for hackers. [![Join the chat at https://gitter.im/dmlc/mxnet](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/dmlc/mxnet?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) What's New ---------- -* [Version 0.11.0-rc2 Release](https://github.com/apache/incubator-mxnet/releases/tag/0.11.0.rc2) - MXNet 0.11.0-rc2 Release. +* [Version 1.0.0 Release](https://github.com/apache/incubator-mxnet/releases/tag/1.0.0) - MXNet 1.0.0 Release. +* [Version 0.12.1 Release](https://github.com/apache/incubator-mxnet/releases/tag/0.12.1) - MXNet 0.12.1 Patch Release. +* [Version 0.12.0 Release](https://github.com/apache/incubator-mxnet/releases/tag/0.12.0) - MXNet 0.12.0 Release. +* [Version 0.11.0 Release](https://github.com/apache/incubator-mxnet/releases/tag/0.11.0) - MXNet 0.11.0 Release. * [Apache Incubator](http://incubator.apache.org/projects/mxnet.html) - We are now an Apache Incubator project. * [Version 0.10.0 Release](https://github.com/dmlc/mxnet/releases/tag/v0.10.0) - MXNet 0.10.0 Release. * [Version 0.9.3 Release](./docs/architecture/release_note_0_9.md) - First 0.9 official release. @@ -33,26 +36,26 @@ What's New * [MKLDNN for Faster CPU Performance](./MKL_README.md) * [MXNet Memory Monger, Training Deeper Nets with Sublinear Memory Cost](https://github.com/dmlc/mxnet-memonger) * [Tutorial for NVidia GTC 2016](https://github.com/dmlc/mxnet-gtc-tutorial) -* [Embedding Torch layers and functions in MXNet](http://mxnet.io/how_to/torch.html) +* [Embedding Torch layers and functions in MXNet](https://mxnet.incubator.apache.org/how_to/torch.html) * [MXNet.js: Javascript Package for Deep Learning in Browser (without server) ](https://github.com/dmlc/mxnet.js/) -* [Design Note: Design Efficient Deep Learning Data Loading Module](http://mxnet.io/architecture/note_data_loading.html) -* [MXNet on Mobile Device](http://mxnet.io/how_to/smart_device.html) -* [Distributed Training](http://mxnet.io/how_to/multi_devices.html) -* [Guide to Creating New Operators (Layers)](http://mxnet.io/how_to/new_op.html) +* [Design Note: Design Efficient Deep Learning Data Loading Module](https://mxnet.incubator.apache.org/architecture/note_data_loading.html) +* [MXNet on Mobile Device](https://mxnet.incubator.apache.org/how_to/smart_device.html) +* [Distributed Training](https://mxnet.incubator.apache.org/how_to/multi_devices.html) +* [Guide to Creating New Operators (Layers)](https://mxnet.incubator.apache.org/how_to/new_op.html) * [Go binding for inference](https://github.com/songtianyi/go-mxnet-predictor) * [Amalgamation and Go Binding for Predictors](https://github.com/jdeng/gomxnet/) - Outdated -* [Training Deep Net on 14 Million Images on A Single Machine](http://mxnet.io/tutorials/computer_vision/imagenet_full.html) +* [Large Scale Image Classification](https://github.com/apache/incubator-mxnet/tree/master/example/image-classification) Contents -------- -* [Documentation](http://mxnet.io/) and [Tutorials](http://mxnet.io/tutorials/) -* [Design Notes](http://mxnet.io/architecture/index.html) +* [Documentation](https://mxnet.incubator.apache.org/) and [Tutorials](https://mxnet.incubator.apache.org/tutorials/) +* [Design Notes](https://mxnet.incubator.apache.org/architecture/index.html) * [Code Examples](https://github.com/dmlc/mxnet/tree/master/example) -* [Installation](http://mxnet.io/get_started/install.html) +* [Installation](https://mxnet.incubator.apache.org/get_started/install.html) * [Pretrained Models](https://github.com/dmlc/mxnet-model-gallery) -* [Contribute to MXNet](http://mxnet.io/community/contribute.html) -* [Frequent Asked Questions](http://mxnet.io/how_to/faq.html) +* [Contribute to MXNet](https://mxnet.incubator.apache.org/community/contribute.html) +* [Frequent Asked Questions](https://mxnet.incubator.apache.org/how_to/faq.html) Features -------- @@ -70,7 +73,7 @@ Ask Questions License ------- -© Contributors, 2015-2017. Licensed under an [Apache-2.0](https://github.com/dmlc/mxnet/blob/master/LICENSE) license. +Licensed under an [Apache-2.0](https://github.com/dmlc/mxnet/blob/master/LICENSE) license. Reference Paper --------------- diff --git a/amalgamation/amalgamation.py b/amalgamation/amalgamation.py index 22b421d79fba..2aba8f4bdc77 100644 --- a/amalgamation/amalgamation.py +++ b/amalgamation/amalgamation.py @@ -32,6 +32,10 @@ minimum = int(sys.argv[6]) if len(sys.argv) > 5 else 0 android = int(sys.argv[7]) if len(sys.argv) > 6 else 0 +# blacklist linear algebra headers when building without blas. +if minimum != 0: + blacklist.append('linalg.h') + def pprint(lst): for item in lst: print item diff --git a/amalgamation/dmlc-minimum0.cc b/amalgamation/dmlc-minimum0.cc index 3f7a97bb0139..be1793a51d7c 100644 --- a/amalgamation/dmlc-minimum0.cc +++ b/amalgamation/dmlc-minimum0.cc @@ -18,11 +18,13 @@ */ /*! + * Copyright 2015 by Contributors. * \brief Mininum DMLC library Amalgamation, used for easy plugin of dmlc lib. * Normally this is not needed. */ #include "../dmlc-core/src/io/line_split.cc" #include "../dmlc-core/src/io/recordio_split.cc" +#include "../dmlc-core/src/io/indexed_recordio_split.cc" #include "../dmlc-core/src/io/input_split_base.cc" #include "../dmlc-core/src/io/local_filesys.cc" #include "../dmlc-core/src/data.cc" diff --git a/amalgamation/mxnet_predict0.cc b/amalgamation/mxnet_predict0.cc index badf23771dbc..053dc1e7a691 100644 --- a/amalgamation/mxnet_predict0.cc +++ b/amalgamation/mxnet_predict0.cc @@ -45,9 +45,11 @@ #include "src/ndarray/ndarray_function.cc" -#include "src/ndarray/autograd.cc" #include "src/ndarray/ndarray.cc" +#include "src/imperative/imperative.cc" +#include "src/imperative/cached_op.cc" + #include "src/engine/engine.cc" #include "src/engine/naive_engine.cc" #include "src/engine/profiler.cc" @@ -74,9 +76,11 @@ #include "src/operator/softmax_activation.cc" #include "src/operator/softmax_output.cc" #include "src/operator/tensor/elemwise_binary_broadcast_op_basic.cc" +#include "src/operator/tensor/elemwise_binary_op.cc" #include "src/operator/tensor/elemwise_binary_op_basic.cc" #include "src/operator/tensor/elemwise_binary_scalar_op_basic.cc" -#include "src/operator/tensor/elemwise_unary_op.cc" +#include "src/operator/tensor/elemwise_unary_op_basic.cc" +#include "src/operator/tensor/elemwise_unary_op_trig.cc" #include "src/operator/tensor/matrix_op.cc" #include "src/storage/storage.cc" @@ -88,3 +92,4 @@ #include "src/c_api/c_api_symbolic.cc" #include "src/c_api/c_api_ndarray.cc" #include "src/c_api/c_api_error.cc" + diff --git a/amalgamation/prep_nnvm.sh b/amalgamation/prep_nnvm.sh index baf6d4d2d0a7..60c96743307c 100755 --- a/amalgamation/prep_nnvm.sh +++ b/amalgamation/prep_nnvm.sh @@ -1,4 +1,20 @@ #! /bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. DMLC_CORE=$(pwd)/../dmlc-core cd ../nnvm/amalgamation make clean diff --git a/benchmark/python/sparse/cast_storage.py b/benchmark/python/sparse/cast_storage.py new file mode 100644 index 000000000000..7ae537398c42 --- /dev/null +++ b/benchmark/python/sparse/cast_storage.py @@ -0,0 +1,99 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import ctypes + +from mxnet.test_utils import * +import os +import time +import argparse + +from mxnet.base import check_call, _LIB + +parser = argparse.ArgumentParser(description="Benchmark cast storage operators", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('--num-omp-threads', type=int, default=1, help='number of omp threads to set in MXNet') +args = parser.parse_args() + +def measure_cost(repeat, f, *args, **kwargs): + start = time.time() + results = [] + for i in range(repeat): + (f(*args, **kwargs)).wait_to_read() + end = time.time() + diff = end - start + return diff / repeat + + +def run_cast_storage_synthetic(): + def dense_to_sparse(m, n, density, ctx, repeat, stype): + set_default_context(ctx) + data_shape = (m, n) + dns_data = rand_ndarray(data_shape, stype, density).tostype('default') + dns_data.wait_to_read() + + # do one warm up run, verify correctness + assert same(mx.nd.cast_storage(dns_data, stype).asnumpy(), dns_data.asnumpy()) + + # start benchmarking + cost = measure_cost(repeat, mx.nd.cast_storage, dns_data, stype) + results = '{:10.1f} {:>10} {:8d} {:8d} {:10.2f}'.format(density*100, str(ctx), m, n, cost*1000) + print(results) + + check_call(_LIB.MXSetNumOMPThreads(ctypes.c_int(args.num_omp_threads))) + + # params + # m number of rows + # n number of columns + # density density of the matrix + # num_repeat number of benchmark runs to average over + # contexts mx.cpu(), mx.gpu() + # note: benchmark different contexts separately; to benchmark cpu, compile without CUDA + # benchmarks dns_to_csr, dns_to_rsp + m = [ 512, 512] + n = [50000, 100000] + density = [1.00, 0.80, 0.60, 0.40, 0.20, 0.10, 0.05, 0.02, 0.01] + num_repeat = 10 + contexts = [mx.gpu()] + benchmarks = ["dns_to_csr", "dns_to_rsp"] + + # run benchmark + for b in benchmarks: + stype = '' + print("==================================================") + if b is "dns_to_csr": + stype = 'csr' + print(" cast_storage benchmark: dense to csr, size m x n ") + elif b is "dns_to_rsp": + stype = 'row_sparse' + print(" cast_storage benchmark: dense to rsp, size m x n ") + else: + print("invalid benchmark: %s" %b) + continue + print("==================================================") + headline = '{:>10} {:>10} {:>8} {:>8} {:>10}'.format('density(%)', 'context', 'm', 'n', 'time(ms)') + print(headline) + for i in range(len(n)): + for ctx in contexts: + for den in density: + dense_to_sparse(m[i], n[i], den, ctx, num_repeat, stype) + print("") + print("") + + +if __name__ == "__main__": + run_cast_storage_synthetic() diff --git a/benchmark/python/sparse/dot.py b/benchmark/python/sparse/dot.py new file mode 100644 index 000000000000..164e50aef051 --- /dev/null +++ b/benchmark/python/sparse/dot.py @@ -0,0 +1,445 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import ctypes + +import os +import time +import argparse +import subprocess +import scipy.sparse as sp + +import mxnet as mx +import numpy as np +import numpy.random as rnd +from mxnet.test_utils import rand_ndarray, set_default_context, assert_almost_equal, get_bz2_data +from mxnet.base import check_call, _LIB +from util import estimate_density + +PARSER = argparse.ArgumentParser(description="Benchmark sparse operators", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +PARSER.add_argument('--num-omp-threads', type=int, + default=1, help='number of omp threads to set in MXNet') +PARSER.add_argument('--gpu', action='store_true', + help="to be run on gpu") +# TODO: Use logging later +PARSER.add_argument('--verbose', action='store_true', + help="Verbose output") +ARGS = PARSER.parse_args() + +# some data information +KDDA = { + 'data_mini': 'kdda.t.mini', + 'data_name': 'kdda.t', + 'data_origin_name': 'kdda.t.bz2', + 'url': "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/kdda.t.bz2", + 'feature_dim': 20216830, + 'm': [1, 8, 32], + 'batch_size': [64], + 'default_index': {'batch_size': 0, + 'output_dim': 2}, + 'num_batches': 10 +} + +AVAZU = { + 'data_mini': 'avazu-app.t.mini', + 'data_name': 'avazu-app.t', + 'data_origin_name': 'avazu-app.t.bz2', + 'url': "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.t.bz2", + 'feature_dim': 1000000, + 'm': [1, 1000, 2000], + 'batch_size': [128, 256], + 'default_index': {'batch_size': 0, + 'output_dim': 1}, + 'num_batches': 10 +} + +CRITEO = { + 'data_mini': 'criteo.t.mini', + 'data_name': 'criteo.t', + 'data_origin_name': 'criteo.t.bz2', + 'url' : "https://s3-us-west-2.amazonaws.com/sparse-dataset/criteo.t.bz2", + 'feature_dim': 8388621, + 'm': [1, 8, 16, 32, 64], + 'batch_size': [64, 128], + 'default_index': {'batch_size': 1, + 'output_dim': 3}, + 'num_batches': 10 +} + +SYNTHETIC1 = { + 'feature_dim': [1000000], + 'm': [256, 1000], + 'density': [0.001, 0.005, 0.01, 0.02, 0.05, + 0.1, 0.2, 0.5, 0.65], + 'batch_size': [64, 128], + 'default_index': {'batch_size': 1, + 'density': 2, + 'output_dim': 1, + 'feature_dim': 0}, + 'num_repeat': 10 +} + +SYNTHETIC2 = { + 'feature_dim': [8000000, 16000000], + 'm': [1, 32], + 'density': [0.001, 0.005, 0.01, 0.02, 0.05, + 0.1, 0.2, 0.5, 0.65], + 'batch_size': [64, 128], + 'default_index': {'batch_size': 1, + 'density': 2, + 'output_dim': 1, + 'feature_dim': 0}, + 'num_repeat': 10 +} + +def measure_cost(repeat, scipy_trans_lhs, scipy_dns_lhs, func_name, *args, **kwargs): + """Measure time cost of running a function + """ + mx.nd.waitall() + args_list = [] + for arg in args: + args_list.append(arg) + start = time.time() + if scipy_trans_lhs: + args_list[0] = np.transpose(args_list[0]) if scipy_dns_lhs else sp.spmatrix.transpose(args_list[0]) + for _ in range(repeat): + func_name(*args_list, **kwargs) + mx.nd.waitall() + end = time.time() + diff = end - start + return diff / repeat + + +def _get_iter(path, data_shape, batch_size): + data_train = mx.io.LibSVMIter(data_libsvm=path, + data_shape=data_shape, + batch_size=batch_size) + data_iter = iter(data_train) + return data_iter + + +def _line_count(path): + return int(subprocess.check_output('wc -l {}'.format(path), shell=True).split()[0]) + + +def _compare_sparse_dense(data_dir, file_name, mini_file_name, feature_dim, + output_dim, density, batch_size, num_batches=3, num_repeat=5, transpose=False, + rsp=False): + + def create_mini_path(mini_path, path, num_batches): + """Samples batches of size: batch_size, total number: num_batches + from the dataset files for running benchmarks""" + if not os.path.exists(mini_path): + last = _line_count(path) - num_batches * batch_size + last = last if last >= 1 else 1 + start = int(rnd.uniform(1, last)) + os.system("sed -n '%d,%dp' %r > %r" + %(start, start + num_batches * batch_size, path, mini_path)) + assert os.path.exists(mini_path) + + + def run_benchmark(mini_path): + """Run benchmarks + """ + data_shape = (feature_dim, ) + train_iter = _get_iter(mini_path, data_shape, batch_size) + weight_row_dim = batch_size if transpose else feature_dim + weight_shape = (weight_row_dim, output_dim) + if not rsp: + weight = mx.nd.random.uniform(low=0, high=1, shape=weight_shape) + else: + weight = rand_ndarray(weight_shape, "row_sparse", density=0.05, distribution="uniform") + total_cost = {} + average_cost = {} + count = 0 + total_cost["sparse"] = 0. + total_cost["dense"] = 0. + for _ in train_iter: + csr_data = train_iter.getdata() + dns_data = csr_data.tostype('default') + cost_sparse = measure_cost(num_repeat, False, False, mx.nd.sparse.dot, csr_data, weight, transpose_a=transpose) + cost_dense = measure_cost(num_repeat, False, False, mx.nd.dot, dns_data, weight, transpose_a=transpose) + total_cost["sparse"] += cost_sparse + total_cost["dense"] += cost_dense + count = count + 1 + average_cost["sparse"] = total_cost["sparse"] / count + average_cost["dense"] = total_cost["dense"] / count + return (average_cost["sparse"], average_cost["dense"]) + + + def print_result(average_cost_sparse, average_cost_dense): + """Print result of comparison between sparse and dense + """ + ratio = average_cost_dense / average_cost_sparse + fmt = '{:15.4f} {:10d} {:10d} {:10d} {:20.2f} {:15.2f} {:15.2f} {:10} {:10}' + print(fmt.format(density * 100, batch_size, output_dim, feature_dim, + ratio, average_cost_dense*1000, average_cost_sparse*1000, + transpose, rsp)) + + mini_path = os.path.join(data_dir, mini_file_name) + path = os.path.join(data_dir, file_name) + create_mini_path(mini_path, path, num_batches) + average_cost_sparse, average_cost_dense = run_benchmark(mini_path) + print_result(average_cost_sparse, average_cost_dense) + + +def test_dot_real(data_dict): + """Dot operator testing with real datasets""" + data_dir = os.path.join(os.getcwd(), 'data') + + path = os.path.join(data_dir, data_dict['data_name']) + if not os.path.exists(path): + get_bz2_data( + data_dir, + data_dict['data_name'], + data_dict['url'], + data_dict['data_origin_name'] + ) + assert os.path.exists(path) + + k = data_dict['feature_dim'] + m = data_dict['m'] + batch_size_list = data_dict['batch_size'] + + default_output_index = data_dict['default_index']['output_dim'] + default_batch_size_index = data_dict['default_index']['batch_size'] + density = estimate_density(path, data_dict['feature_dim']) + num_batches = data_dict['num_batches'] + + assert default_batch_size_index < len(batch_size_list) + assert default_output_index < len(m) + if ARGS.verbose: + print("Running Benchmarking on %r data") % data_dict['data_mini'] + print('{:>15} {:>10} {:>10} {:>10} {:>20} {:>15} {:>15} {:>10} {:>10}'.format('density(%)', + 'n', + 'm', + 'k', + 't_dense/t_sparse', + 't_dense(ms)', + 't_sparse(ms)', + 'is_transpose', + 'rhs_rsp')) + + + for output_dim in m: + _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], + k, output_dim, density, + batch_size_list[default_batch_size_index], num_batches) + _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], + k, output_dim, density, + batch_size_list[default_batch_size_index], num_batches, + transpose=True) + _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], + k, output_dim, density, + batch_size_list[default_batch_size_index], num_batches, rsp=True) + + for batch_size in batch_size_list: + _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], + k, m[default_output_index], density, batch_size, num_batches) + _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], + k, m[default_output_index], density, batch_size, num_batches, + transpose=True) + _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], + k, output_dim, density, + batch_size_list[default_batch_size_index], num_batches, rsp=True) + + +def test_dot_synthetic(data_dict): + """benchmark sparse mxnet dot and scipy dot operator with matrices of given density. + `t_sparse` is the runtime of the invoked sparse dot operator in ms, while `t_dense` is the + runtime of dot(dns, dns), with the same matrices except that they are in default storage type. + """ + # Benchmark MXNet and Scipys dot operator + def bench_dot(lhs_shape, rhs_shape, lhs_stype, rhs_stype, + lhs_den, rhs_den, trans_lhs, ctx, num_repeat=10, fw="mxnet", distribution="uniform"): + set_default_context(ctx) + assert fw == "mxnet" or fw == "scipy" + # Set funcs + dot_func_sparse = mx.nd.sparse.dot if fw == "mxnet" else sp.spmatrix.dot + dot_func_dense = mx.nd.dot if fw == "mxnet" else np.dot + # Create matrix instances + lhs_nd = rand_ndarray(lhs_shape, lhs_stype, density=lhs_den, distribution=distribution) + # only uniform distribution supported for rhs + rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_den, distribution="uniform") + lhs_dns = None + rhs_dns = None + dense_cost = None + sparse_cost = None + + if fw == "mxnet": + lhs_dns = lhs_nd if lhs_stype == 'default' else lhs_nd.tostype('default') + rhs_dns = rhs_nd if rhs_stype == 'default' else rhs_nd.tostype('default') + # One warm up run, verify correctness + out = dot_func_sparse(lhs_nd, rhs_dns, trans_lhs) + out_expected = dot_func_dense(lhs_dns, rhs_dns, trans_lhs) + assert_almost_equal(out.asnumpy(), out_expected.asnumpy(), rtol=1e-1, atol=1e-1) + sparse_cost = measure_cost(num_repeat, False, False, dot_func_sparse, lhs_nd, rhs_nd, trans_lhs) + dense_cost = measure_cost(num_repeat, False, False, dot_func_dense, lhs_dns, rhs_dns, trans_lhs) + else: + lhs_dns = lhs_nd.asnumpy() + rhs_dns = rhs_nd.asnumpy() + lhs_nd = sp.csr_matrix(lhs_nd.asnumpy()) + rhs_nd = rhs_nd.asnumpy() + # One warm up run, verify correctness + lhs_nd_copy = sp.spmatrix.transpose(lhs_nd) if trans_lhs else lhs_nd + out = dot_func_sparse(lhs_nd_copy, rhs_dns) + sparse_cost = measure_cost(num_repeat, trans_lhs, False, dot_func_sparse, lhs_nd, rhs_nd) + dense_cost = measure_cost(num_repeat, trans_lhs, True, dot_func_dense, lhs_dns, rhs_dns) + + speedup = dense_cost / sparse_cost + # Print results + m = lhs_shape[0] + k = lhs_shape[1] + n = rhs_shape[1] + result_pattern = '{:15.1f} {:15.1f} {:>10} {:8d} {:8d} {:8d} {:13.2f} {:13.2f} {:8.2f}' + results = result_pattern.format(lhs_den*100, + rhs_den*100, + str(ctx), + m, + k, + n, + sparse_cost*1000, + dense_cost*1000, + speedup) + print(results) + + def print_benchmark_info(lhs, rhs, lhs_trans, fw): + trans_str = "^T" if lhs_trans else "" + print("========================================================") + print(" %s sparse dot benchmark: dot(%s, %s) = %s ") % (fw, lhs, rhs, rhs) + print(" (matrix multiplication: (m x k)%s * (k x n) = m x n) ") % (trans_str) + print("========================================================") + headline_pattern = '{:>15} {:>15} {:>10} {:>8} {:>8} {:>8} {:>13} {:>13} {:>8}' + headline = headline_pattern.format('lhs_density(%)', + 'rhs_density(%)', + 'context', + 'm', 'k', 'n', + 't_sparse(ms)', + 't_dense(ms)', + 'speedup') + print(headline) + + + def run_benchmark(ctx=None, lhs="csr", lhs_trans=False, rhs="dns", fw="mxnet", rhs_density=1, + distribution="uniform"): + if lhs != "csr": + raise ValueError("Value other than csr for lhs not supported") + if rhs_density > 1 or rhs_density < 0: + raise ValueError("rhs_density has to be between 0 and 1") + + print_benchmark_info(lhs, rhs, lhs_trans, fw) + + + lhs_stype = "csr" + rhs_stype = "row_sparse" if rhs == "rsp" else "default" + + feature_dim_list = data_dict['feature_dim'] + output_dim_list = data_dict['m'] + batch_size_list = data_dict['batch_size'] + density_list = data_dict['density'] + + default_output_index = data_dict['default_index']['output_dim'] + default_batch_size_index = data_dict['default_index']['batch_size'] + default_feature_index = data_dict['default_index']['feature_dim'] + default_density_index = data_dict['default_index']['density'] + num_repeat = data_dict['num_repeat'] + + for output_dim in output_dim_list: + if lhs_trans: + output_row_dim = batch_size_list[default_batch_size_index] + else: + output_row_dim = feature_dim_list[default_feature_index] + bench_dot((batch_size_list[default_batch_size_index], + feature_dim_list[default_feature_index]), + (output_row_dim, output_dim), + lhs_stype, rhs_stype, + density_list[default_density_index], rhs_density, + lhs_trans, ctx, num_repeat=num_repeat, + fw=fw, distribution=distribution) + + for feature_dim in feature_dim_list: + if lhs_trans: + output_row_dim = batch_size_list[default_batch_size_index] + else: + output_row_dim = feature_dim + bench_dot((batch_size_list[default_batch_size_index], feature_dim), + (output_row_dim, output_dim_list[default_output_index]), + lhs_stype, rhs_stype, density_list[default_density_index], rhs_density, + lhs_trans, ctx, num_repeat=num_repeat, fw=fw, distribution=distribution) + + for batch_size in batch_size_list: + if lhs_trans: + output_row_dim = batch_size + else: + output_row_dim = feature_dim_list[default_feature_index] + bench_dot((batch_size, feature_dim_list[default_feature_index]), + (output_row_dim, + output_dim_list[default_output_index]), + lhs_stype, rhs_stype, density_list[default_density_index], + rhs_density, lhs_trans, ctx, num_repeat=num_repeat, + fw=fw, distribution=distribution) + + for density in density_list: + if lhs_trans: + output_row_dim = batch_size_list[default_batch_size_index] + else: + output_row_dim = feature_dim_list[default_feature_index] + bench_dot((batch_size_list[default_batch_size_index], + feature_dim_list[default_feature_index]), + (output_row_dim, + output_dim_list[default_output_index]), + lhs_stype, rhs_stype, density, rhs_density, lhs_trans, ctx, + num_repeat=num_repeat, fw=fw, distribution=distribution) + + check_call(_LIB.MXSetNumOMPThreads(ctypes.c_int(ARGS.num_omp_threads))) + context = mx.gpu() if ARGS.gpu else mx.cpu() + # TODO(anirudh): make the data dicts to config which can be passed at runtime + distributions = ["uniform", "powerlaw"] + for distribution in distributions: + run_benchmark(context, lhs="csr", + rhs="default", lhs_trans=False, + fw="mxnet", rhs_density=1, + distribution=distribution) + run_benchmark(context, lhs="csr", + rhs="default", lhs_trans=True, + fw="mxnet", rhs_density=1, + distribution=distribution) + run_benchmark(context, lhs="csr", + rhs="rsp", lhs_trans=False, + fw="mxnet", rhs_density=0.05, + distribution=distribution) + if not ARGS.gpu: + run_benchmark(context, lhs="csr", + rhs="default", lhs_trans=False, + fw="scipy", rhs_density=1, + distribution=distribution) + run_benchmark(context, lhs="csr", + rhs="default", lhs_trans=True, + fw="scipy", rhs_density=1, + distribution=distribution) + + +if __name__ == "__main__": + begin_time = time.time() + test_dot_real(KDDA) + test_dot_real(AVAZU) + test_dot_real(CRITEO) + test_dot_synthetic(SYNTHETIC1) + test_dot_synthetic(SYNTHETIC2) + total_time = time.time() - begin_time + print("total time is %f") % total_time diff --git a/benchmark/python/sparse/memory_benchmark.py b/benchmark/python/sparse/memory_benchmark.py new file mode 100644 index 000000000000..b60f214ec79e --- /dev/null +++ b/benchmark/python/sparse/memory_benchmark.py @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Should be run with valgrind to get memory consumption + for sparse format storage and dot operators. This script can be + used for memory benchmarking on CPU only""" +import ctypes +import sys +import argparse +import mxnet as mx +from mxnet.test_utils import rand_ndarray +from mxnet.base import check_call, _LIB + + +def parse_args(): + """ Function to parse arguments + """ + parser = argparse.ArgumentParser() + parser.add_argument("--lhs-row-dim", + required=True, + help="Provide batch_size") + parser.add_argument("--lhs-col-dim", + required=True, + help="Provide feature_dim") + parser.add_argument("--rhs-col-dim", + required=True, + help="Provide output_dim") + parser.add_argument("--density", + required=True, + help="Density for lhs") + parser.add_argument("--num-omp-threads", type=int, + default=1, help="number of omp threads to set in MXNet") + parser.add_argument("--lhs-stype", default="csr", + choices=["csr", "default", "row_sparse"], + help="stype for lhs", + required=True) + parser.add_argument("--rhs-stype", default="default", + choices=["default", "row_sparse"], + help="rhs stype", + required=True) + parser.add_argument("--only-storage", + action="store_true", + help="only storage") + parser.add_argument("--rhs-density", + help="rhs_density") + return parser.parse_args() + + +def main(): + args = parse_args() + lhs_row_dim = int(args.lhs_row_dim) + lhs_col_dim = int(args.lhs_col_dim) + rhs_col_dim = int(args.rhs_col_dim) + density = float(args.density) + lhs_stype = args.lhs_stype + rhs_stype = args.rhs_stype + if args.rhs_density: + rhs_density = float(args.rhs_density) + else: + rhs_density = density + dot_func = mx.nd.sparse.dot if lhs_stype == "csr" else mx.nd.dot + check_call(_LIB.MXSetNumOMPThreads(ctypes.c_int(args.num_omp_threads))) + bench_dot(lhs_row_dim, lhs_col_dim, rhs_col_dim, density, + rhs_density, dot_func, False, lhs_stype, rhs_stype, args.only_storage) + +def bench_dot(lhs_row_dim, lhs_col_dim, rhs_col_dim, density, + rhs_density, dot_func, trans_lhs, lhs_stype, + rhs_stype, only_storage, distribution="uniform"): + """ Benchmarking both storage and dot + """ + lhs_nd = rand_ndarray((lhs_row_dim, lhs_col_dim), lhs_stype, density, distribution=distribution) + if not only_storage: + rhs_nd = rand_ndarray((lhs_col_dim, rhs_col_dim), rhs_stype, + density=rhs_density, distribution=distribution) + out = dot_func(lhs_nd, rhs_nd, trans_lhs) + mx.nd.waitall() + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/benchmark/python/sparse/sparse_end2end.py b/benchmark/python/sparse/sparse_end2end.py new file mode 100644 index 000000000000..ecd9057dedfc --- /dev/null +++ b/benchmark/python/sparse/sparse_end2end.py @@ -0,0 +1,307 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import time +import argparse +import os +import multiprocessing +from mxnet.test_utils import * + +MAX_NUM_BATCH = 99999999 +COMP = "compute" +COMM = "communication" +IO = "io" + +parser = argparse.ArgumentParser(description="Run sparse linear regression " \ + "with distributed kvstore", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('--profiler', type=int, default=0, + help='whether to use profiler') +parser.add_argument('--num-epoch', type=int, default=1, + help='number of epochs to train') +parser.add_argument('--batch-size', type=int, default=512, + help='number of examples per batch') +parser.add_argument('--num-batch', type=int, default=MAX_NUM_BATCH, + help='number of batches per epoch') +parser.add_argument('--dummy-iter', type=int, default=0, + help='whether to use dummy iterator to exclude io cost') +parser.add_argument('--kvstore', type=str, default=None, + help='what kvstore to use [local, dist_sync, etc]') +parser.add_argument('--sparse-log-level', type=str, default='DEBUG', + help='logging level [DEBUG, INFO, ERROR]') +parser.add_argument('--dataset', type=str, default='avazu', + help='what test dataset to use') +parser.add_argument('--num-gpu', type=int, default=0, + help='number of gpus to use. 0 means using cpu(0);' + 'otherwise, use gpu(0),...,gpu(num_gpu-1)') +parser.add_argument('--output-dim', type=int, default=4, + help='number of columns of the forward output') +parser.add_argument('--dummy-metric', type=int, default=0, + help='whether to call update_metric') +parser.add_argument('--enable-logging-for', default="0", + help="Enable logging for the specified list of workers") +parser.add_argument('--measure-only', default=None, + help="Measure only", + choices=[IO, COMP, COMM]) +parser.add_argument('--omit-row-sparse-push', action='store_true', + help="omit row_sparse_push") + +class DummyIter(mx.io.DataIter): + "A dummy iterator that always return the same batch, used for speed testing" + def __init__(self, real_iter): + super(DummyIter, self).__init__() + self.real_iter = real_iter + self.provide_data = real_iter.provide_data + self.provide_label = real_iter.provide_label + self.batch_size = real_iter.batch_size + + for batch in real_iter: + self.the_batch = batch + break + + def __iter__(self): + return self + + def next(self): + return self.the_batch + +# testing dataset sources +avazu = { + 'data_name': 'avazu-app.t', + 'data_origin_name': 'avazu-app.t.bz2', + 'url': "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.t.bz2", + 'feature_dim': 1000001, + 'lc': 1719304, +} + +kdda = { + 'data_name': 'kdda.t', + 'data_origin_name': 'kdda.t.bz2', + 'url': "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/kdda.t.bz2", + 'feature_dim': 20216831, + 'lc': 510302, +} + +criteo = { + 'data_name': 'criteo.t', + 'data_origin_name': 'criteo.t.bz2', + 'url': "https://s3-us-west-2.amazonaws.com/sparse-dataset/criteo.t.bz2", + 'feature_dim': 8388621, + 'lc': 548787, +} + +datasets = { 'kdda' : kdda, 'avazu' : avazu , 'criteo': criteo } + + +def get_sym(feature_dim): + inputs = mx.symbol.Variable("data", stype='csr') + norm_init = mx.initializer.Normal(sigma=0.01) + weights = mx.symbol.Variable("w", shape=(feature_dim, args.output_dim), + init=norm_init, stype='row_sparse') + embed = mx.symbol.sparse.dot(inputs, weights) + softmax_output = mx.symbol.Variable("softmax_label") + model = mx.symbol.SoftmaxOutput(data=embed, label=softmax_output, name="out") + return model + + +def row_sparse_push(kv, param_arrays, grad_arrays, param_names): + for index, pair in enumerate(zip(param_arrays, grad_arrays)): + arg_list, grad_list = pair + if grad_list[0] is None: + continue + name = param_names[index] + kv.push(name, grad_list, priority=-index) + + +def row_sparse_pull(kv, key, data, slices, weight_array, priority): + # if have kvstore, need to pull corresponding rows of + # the weights to each context + # column indices (NDArray type) of the csr data + # used as the row_idx of the weight row-sparse matrix + row_indices = data.indices + if len(slices) == 1: + kv.row_sparse_pull(key, weight_array, priority=priority, row_ids=row_indices) + else: # more than one slices, multi-GPU training. Need to retain weight rows according to data slices + # TODO(junwu): + # the following line blocks, may need to pre-compute + # and cache it outside the for loop + indptr = data.indptr.asnumpy() + row_idx_array = [] + for s in slices: + row_idx_array.append(row_indices[indptr[s.start]:indptr[s.stop]]) + kv.row_sparse_pull(key, weight_array, priority=priority, row_ids=row_idx_array) + + +if __name__ == '__main__': + + # arg parser + args = parser.parse_args() + num_epoch = args.num_epoch + num_batch = args.num_batch + kvstore = args.kvstore + profiler = args.profiler > 0 + batch_size = args.batch_size if args.num_gpu == 0 else args.num_gpu * args.batch_size + dummy_iter = args.dummy_iter + dataset = args.dataset + log_level = args.sparse_log_level + measure_only = args.measure_only + num_cores = multiprocessing.cpu_count() + omit_row_sparse_push = args.omit_row_sparse_push + if measure_only == COMP or measure_only == IO: + assert not kvstore, "when compute_only or io_only is set, kvstore should be None" + num_batch = datasets[dataset]['lc'] / batch_size if num_batch == MAX_NUM_BATCH else num_batch + if measure_only == COMM: + assert (kvstore == "dist_async"), "when communication_only is set kvstore should be dist_async" + num_batch = datasets[dataset]['lc'] / batch_size if num_batch == MAX_NUM_BATCH else num_batch + + + contexts = mx.context.cpu(0) if args.num_gpu < 1\ + else [mx.context.gpu(i) for i in range(args.num_gpu)] + + # create kvstore when there are gpus + kv = mx.kvstore.create(kvstore) if kvstore else None + rank = kv.rank if kv is not None else 0 + num_worker = kv.num_workers if kv is not None else 1 + + # only print log for rank 0 worker + import logging + if log_level == 'ERROR': + log_level = logging.ERROR + elif log_level == 'DEBUG': + log_level = logging.DEBUG + else: + log_level = logging.INFO + + # Only log if it is in the list of workers to be logged + logging_workers_list = [int(i) for i in args.enable_logging_for.split(",")] + log_level = log_level if rank in logging_workers_list else logging.CRITICAL + + head = '%(asctime)-15s %(message)s' + logging.basicConfig(level=log_level, format=head) + + # dataset + assert(dataset in datasets), "unknown dataset " + dataset + metadata = datasets[dataset] + feature_dim = metadata['feature_dim'] + if logging: + logging.debug('preparing data ... ') + data_dir = os.path.join(os.getcwd(), 'data') + path = os.path.join(data_dir, metadata['data_name']) + if not os.path.exists(path): + get_bz2_data(data_dir, metadata['data_name'], metadata['url'], + metadata['data_origin_name']) + assert os.path.exists(path) + + # data iterator + train_data = mx.io.LibSVMIter(data_libsvm=path, data_shape=(feature_dim,), + batch_size=batch_size, num_parts=num_worker, + part_index=rank) + if dummy_iter or measure_only == COMP or measure_only == COMM: + train_data = DummyIter(train_data) + + # model + model = get_sym(feature_dim) + + # module + mod = mx.mod.Module(symbol=model, data_names=['data'], + label_names=['softmax_label'], context=contexts) + mod.bind(data_shapes=train_data.provide_data, label_shapes=train_data.provide_label) + mod.init_params(initializer=mx.init.Uniform(scale=.1)) + sgd = mx.optimizer.SGD(momentum=0.0, clip_gradient=5.0, + learning_rate=0.1, rescale_grad=1.0/batch_size/num_worker) + mod.init_optimizer(optimizer=sgd, kvstore=kv) + # use accuracy as the metric + metric = mx.metric.create('acc') + + index = mod._exec_group.param_names.index('w') + # weight_array bound to executors of the contexts + weight_array = mod._exec_group.param_arrays[index] + + mx.nd.waitall() # sync point for initialization + # start profiler + if profiler: + device = 'cpu' + if args.num_gpu > 0: + device = 'gpu' + str(args.num_gpu) + name = 'profile_' + args.dataset + '_' + device + '_nworker' + str(num_worker)\ + + '_batchsize' + str(args.batch_size) + '_outdim' + str(args.output_dim) + '.json' + mx.profiler.profiler_set_config(mode='all', filename=name) + mx.profiler.profiler_set_state('run') + + logging.debug('start training ...') + start = time.time() + data_iter = iter(train_data) + time_cost_epoch = 0. + sum_cost_epoch = 0. + average_cost_epoch = 0. + + for epoch in range(num_epoch): + start_time_epoch = time.time() + nbatch = 0 + end_of_batch = False + metric.reset() + next_batch = next(data_iter) + if kv is not None: + row_sparse_pull(kv, 'w', next_batch.data[0], mod._exec_group.slices, weight_array, -index) + while not end_of_batch: + nbatch += 1 + batch = next_batch + + if measure_only != IO and measure_only != COMM: + mod.forward_backward(batch) + # update parameters + mod.update() + if measure_only == COMM: + if nbatch == 1: + mod.forward_backward(batch) + mod.update() + elif not omit_row_sparse_push: + row_sparse_push(kv, mod._exec_group.param_arrays, mod._exec_group.grad_arrays, mod._exec_group.param_names) + + + try: + # pre fetch next batch + next_batch = next(data_iter) + if nbatch == num_batch: + raise StopIteration + if kv is not None: + row_sparse_pull(kv, 'w', next_batch.data[0], mod._exec_group.slices, weight_array, -index) + except StopIteration: + end_of_batch = True + # accumulate prediction accuracy + if args.dummy_metric == 0: + mod.update_metric(metric, batch.label) + else: # call waitall to replace update_metric as sync point + mx.nd.waitall() # sync point for the current minibatch + logging.info('epoch {}, {}'.format(epoch, metric.get())) + end_time_epoch = time.time() + if epoch == 0: + logging.debug("num_batches = {}".format(nbatch)) + logging.info('|device|num_worker|average_cost_epoch|rank|') + time_cost_epoch = end_time_epoch - start_time_epoch + if epoch > 0: + sum_cost_epoch = sum_cost_epoch + time_cost_epoch + average_cost_epoch = float(sum_cost_epoch) / epoch + logging.info('num_worker = {}, time cost per epoch = {}'.format(str(num_worker), str(time_cost_epoch))) + if args.num_gpu < 1: + logging.info('|cpu/{} cores| {} | {} | {} |'.format(str(num_cores), str(num_worker), str(average_cost_epoch), rank)) + data_iter.reset() + if profiler: + mx.profiler.profiler_set_state('stop') + end = time.time() + time_cost = end - start + logging.info('num_worker = {}, rank = {}, time cost = {}'.format(str(num_worker), str(rank), str(time_cost))) diff --git a/benchmark/python/sparse/sparse_op.py b/benchmark/python/sparse/sparse_op.py new file mode 100644 index 000000000000..ebe62af05da6 --- /dev/null +++ b/benchmark/python/sparse/sparse_op.py @@ -0,0 +1,245 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import ctypes + +from mxnet.test_utils import * +import scipy.sparse as sp +import os +import time +import argparse + +from mxnet.base import check_call, _LIB +from util import get_data, estimate_density + +parser = argparse.ArgumentParser(description="Benchmark sparse operators", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('--num-omp-threads', type=int, default=1, help='number of omp threads to set in MXNet') +args = parser.parse_args() + +# some data information +kdda = { + 'data_mini': 'kdda.t.mini', + 'data_name': 'kdda.t', + 'data_origin_name': 'kdda.t.bz2', + 'url': "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/kdda.t.bz2", + 'feature_dim': 20216830, + 'm': 200, + 'batch_size': [64] +} + +avazu = { + 'data_mini': 'avazu-app.t.mini', + 'data_name': 'avazu-app.t', + 'data_origin_name': 'avazu-app.t.bz2', + 'url': "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.t.bz2", + 'feature_dim': 1000000, + 'm': 500, + 'batch_size': [64, 128] +} + + +def measure_cost(repeat, f, *args, **kwargs): + # start bench + start = time.time() + results = [] + for i in range(repeat): + results.append(f(*args, **kwargs)) + for result in results: + result.wait_to_read() + end = time.time() + diff = end - start + return diff / repeat + + +def test_dot_real(data_dict): + def get_iter(path, data_shape, batch_size): + data_train = mx.io.LibSVMIter(data_libsvm=path, + data_shape=data_shape, + batch_size=batch_size) + data_iter = iter(data_train) + return data_iter + + data_dir = os.path.join(os.getcwd(), 'data') + + path = os.path.join(data_dir, data_dict['data_name']) + if not os.path.exists(path): + get_data( + data_dir, + data_dict['data_name'], + data_dict['url'], + data_dict['data_origin_name'] + ) + assert os.path.exists(path) + + k = data_dict['feature_dim'] + m = data_dict['m'] + density = estimate_density(path, data_dict['feature_dim']) + + mini_path = os.path.join(data_dir, data_dict['data_mini']) + if not os.path.exists(mini_path): + os.system("head -n 2000 %r > %r" % (path, mini_path)) + assert os.path.exists(mini_path) + + print "Running Benchmarking on %r data" % data_dict['data_mini'] + for batch_size in data_dict['batch_size']: # iterator through different batch size of choice + print "batch_size is %d" % batch_size + # model + data_shape = (k, ) + train_iter = get_iter(mini_path, data_shape, batch_size) + weight = mx.nd.random.uniform(low=0, high=1, shape=(k, m)) + + csr_data = [] + dns_data = [] + num_batch = 0 + for batch in train_iter: + data = train_iter.getdata() + csr_data.append(data) + dns_data.append(data.tostype('default')) + num_batch += 1 + bag_of_data = [csr_data, dns_data] + num_repeat = 5 + costs = [] + for d in bag_of_data: + weight.wait_to_read() + cost = 0. + count = 0 + for d_batch in d: + d_batch.wait_to_read() + cost += measure_cost(num_repeat, mx.nd.dot, d_batch, weight) + count += 1 + costs.append(cost/count) + t_sparse = costs[0] + t_dense = costs[1] + ratio = t_dense / t_sparse + print('density(%)\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse') + fmt = "%0.4f\t\t%d\t%d\t%d\t%0.2f\t\t\t%0.4f\t%0.6f" + print(fmt % (density * 100, batch_size, m, k, ratio, t_dense, t_sparse)) + + +def test_dot_synthetic(): + """benchmark mx.nd.dot(sparse_ndarray, dense_ndarray) with given density. + `t_sparse` is the time cost of dot(csr, dns), while `t_dense` is the time cost + of dot(dns, dns), with the same matrix except that it is in default storage type. + """ + def measure_cost_forward_baseline(repeat, dot, lhs, rhs): + start = time.time() + for i in range(repeat): + dot(lhs, rhs) + end = time.time() + diff = end - start + return diff / repeat + + def measure_cost_backward_baseline(repeat, dot, transpose, lhs, rhs): + start = time.time() + for i in range(repeat): + dot(transpose(lhs), rhs) + end = time.time() + diff = end - start + return diff / repeat + + def bench_dot_forward(m, k, n, density, ctx, repeat): + set_default_context(ctx) + dns = mx.nd.random.uniform(shape=(k, n)).copyto(ctx) + data_shape = (m, k) + csr_data = rand_ndarray(data_shape, 'csr', density) + dns_data = csr_data.tostype('default') + rhs_dns_np = dns.asnumpy() + lhs_csr_sp = sp.csr_matrix(dns_data.asnumpy()) # csr in scipy + lhs_dns_np = lhs_csr_sp.tostype('default') + + data = [dns_data, csr_data] + costs = [] + for d in data: + dns.wait_to_read() + d.wait_to_read() + cost = measure_cost(repeat, mx.nd.dot, d, dns) + costs.append(cost) + ratio = costs[0] / costs[1] + + costs_baseline = [] + cost = measure_cost_forward_baseline(repeat, np.dot, lhs_dns_np, rhs_dns_np) + costs_baseline.append(cost) + cost = measure_cost_forward_baseline(repeat, sp.spmatrix.dot, lhs_csr_sp, rhs_dns_np) + costs_baseline.append(cost) + ratio_baseline = costs_baseline[0] / costs_baseline[1] + fmt = "%0.1f\t\t%s\t%d\t%d\t%d\t%0.2f\t\t\t%0.2f\t%0.5f\t\t%0.2f\t\t\t\t%0.6f\t%0.5f" + print(fmt % (density * 100, str(ctx), n, m, k, ratio, costs[0], costs[1], + ratio_baseline, costs_baseline[0], costs_baseline[1])) + + def bench_dot_backward(m, k, n, density, ctx, repeat): + set_default_context(ctx) + dns = mx.nd.random.uniform(shape=(m, n)).copyto(ctx) + data_shape = (m, k) + csr_data = rand_ndarray(data_shape, 'csr', density) + dns_data = csr_data.tostype('default') + rhs_dns_np = dns.asnumpy() + lhs_csr_sp = sp.csr_matrix(dns_data.asnumpy()) + lhs_dns_np = lhs_csr_sp.tostype('default') + + data = [dns_data, csr_data] + costs = [] + for d in data: + dns.wait_to_read() + d.wait_to_read() + cost = measure_cost(repeat, mx.nd.dot, d, dns, transpose_a=True) + costs.append(cost) + ratio = costs[0] / costs[1] + + costs_baseline = [] + cost = measure_cost_backward_baseline(repeat, np.dot, np.transpose, lhs_dns_np, rhs_dns_np) + costs_baseline.append(cost) + cost = measure_cost_backward_baseline(repeat, sp.spmatrix.dot, sp.spmatrix.transpose, lhs_csr_sp, rhs_dns_np) + costs_baseline.append(cost) + ratio_baseline = costs_baseline[0] / costs_baseline[1] + fmt = "%0.1f\t\t%s\t%d\t%d\t%d\t%0.2f\t\t\t%0.2f\t%0.5f\t\t%0.2f\t\t\t\t%0.6f\t%0.5f" + print(fmt % (density * 100, str(ctx), n, m, k, ratio, costs[0], costs[1], + ratio_baseline, costs_baseline[0], costs_baseline[1])) + + print("A = sparse NDArray of shape(m, k)") + print("B = dense NDArray of shape(k, n)") + print("dot_forward\tdot(csr, dns)") + print('density(%)\tcontext\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse' + '\tt_scipy_dense/t_scipy_sparse\tt_scipy_dense\tt_scipy_sparse') + + check_call(_LIB.MXSetNumOMPThreads(ctypes.c_int(args.num_omp_threads))) + # TODO(haibin) make these runtime options + m = 512 + k = [50000, 100000] + n = [64, 128] + density = [1.00, 0.90, 0.70, 0.50, 0.30, 0.20, 0.10, 0.07, 0.05, 0.02, 0.01, 0.005, 0.001] + num_repeat = 10 + # contexts = [mx.cpu(), mx.gpu(0)] + contexts = [mx.cpu()] + for i in range(2): + for ctx in contexts: + for den in density: + bench_dot_forward(m, k[i], n[i], den, ctx, num_repeat) + + print("dot_backward\tdot(csr.T, dns)") + print('density(%)\tcontext\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse' + '\tt_scipy_dense/t_scipy_sparse\tt_scipy_dense\tt_scipy_sparse') + for i in range(2): + for ctx in contexts: + for den in density: + bench_dot_backward(m, k[i], n[i], den, ctx, num_repeat) + + +if __name__ == "__main__": + test_dot_real(avazu) + test_dot_real(kdda) + test_dot_synthetic() diff --git a/benchmark/python/sparse/util.py b/benchmark/python/sparse/util.py new file mode 100644 index 000000000000..c20b33a86d65 --- /dev/null +++ b/benchmark/python/sparse/util.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import random + +def estimate_density(DATA_PATH, feature_size): + """sample 10 times of a size of 1000 for estimating the density of the sparse dataset""" + if not os.path.exists(DATA_PATH): + raise Exception("Data is not there!") + density = [] + P = 0.01 + for _ in xrange(10): + num_non_zero = 0 + num_sample = 0 + with open(DATA_PATH) as f: + for line in f: + if (random.random() < P): + num_non_zero += len(line.split(" ")) - 1 + num_sample += 1 + density.append(num_non_zero * 1.0 / (feature_size * num_sample)) + return sum(density) / len(density) + diff --git a/cmake/ChooseBlas.cmake b/cmake/ChooseBlas.cmake new file mode 100644 index 000000000000..3a8723a5dd5e --- /dev/null +++ b/cmake/ChooseBlas.cmake @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(BLAS "Open" CACHE STRING "Selected BLAS library") +set_property(CACHE BLAS PROPERTY STRINGS "Atlas;Open;MKL") + +if(USE_MKL_IF_AVAILABLE) + if(NOT MKL_FOUND) + find_package(MKL) + endif() + if(MKL_FOUND) + if(USE_MKLML_MKL) + set(BLAS "open") + else() + set(BLAS "MKL") + endif() + endif() +endif() + +if(BLAS STREQUAL "Atlas" OR BLAS STREQUAL "atlas") + find_package(Atlas REQUIRED) + include_directories(SYSTEM ${Atlas_INCLUDE_DIR}) + list(APPEND mshadow_LINKER_LIBS ${Atlas_LIBRARIES}) + add_definitions(-DMSHADOW_USE_CBLAS=1) + add_definitions(-DMSHADOW_USE_MKL=0) +elseif(BLAS STREQUAL "Open" OR BLAS STREQUAL "open") + find_package(OpenBLAS REQUIRED) + include_directories(SYSTEM ${OpenBLAS_INCLUDE_DIR}) + list(APPEND mshadow_LINKER_LIBS ${OpenBLAS_LIB}) + add_definitions(-DMSHADOW_USE_CBLAS=1) + add_definitions(-DMSHADOW_USE_MKL=0) +elseif(BLAS STREQUAL "MKL" OR BLAS STREQUAL "mkl") + find_package(MKL REQUIRED) + include_directories(SYSTEM ${MKL_INCLUDE_DIR}) + list(APPEND mshadow_LINKER_LIBS ${MKL_LIBRARIES}) + add_definitions(-DMSHADOW_USE_CBLAS=0) + add_definitions(-DMSHADOW_USE_MKL=1) +elseif(BLAS STREQUAL "apple") + find_package(Accelerate REQUIRED) + include_directories(SYSTEM ${Accelerate_INCLUDE_DIR}) + list(APPEND mshadow_LINKER_LIBS ${Accelerate_LIBRARIES}) + add_definitions(-DMSHADOW_USE_MKL=0) + add_definitions(-DMSHADOW_USE_CBLAS=1) +endif() \ No newline at end of file diff --git a/cmake/FirstClassLangCuda.cmake b/cmake/FirstClassLangCuda.cmake new file mode 100644 index 000000000000..73f075806243 --- /dev/null +++ b/cmake/FirstClassLangCuda.cmake @@ -0,0 +1,236 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#this file is CUDA help function with CMAKE first class CUDA + +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11) + +################################################################################################ +# Short command for cuDNN detection. Believe it soon will be a part of CUDA toolkit distribution. +# That's why not FindcuDNN.cmake file, but just the macro +# Usage: +# detect_cuDNN() +function(detect_cuDNN) + set(CUDNN_ROOT "" CACHE PATH "CUDNN root folder") + + find_path(CUDNN_INCLUDE cudnn.h + PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} + DOC "Path to cuDNN include directory." ) + + + find_library(CUDNN_LIBRARY NAMES libcudnn.so cudnn.lib # libcudnn_static.a + PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} + DOC "Path to cuDNN library.") + + if(CUDNN_INCLUDE AND CUDNN_LIBRARY) + set(HAVE_CUDNN TRUE PARENT_SCOPE) + set(CUDNN_FOUND TRUE PARENT_SCOPE) + + mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT) + message(STATUS "Found cuDNN (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})") + endif() +endfunction() + + + +################################################################################################ +# A function for automatic detection of GPUs installed (if autodetection is enabled) +# Usage: +# mshadow_detect_installed_gpus(out_variable) +function(mshadow_detect_installed_gpus out_variable) + if(NOT CUDA_gpu_detect_output) + set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) + + file(WRITE ${__cufile} "" + "#include \n" + "int main()\n" + "{\n" + " int count = 0;\n" + " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" + " if (count == 0) return -1;\n" + " for (int device = 0; device < count; ++device)\n" + " {\n" + " cudaDeviceProp prop;\n" + " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" + " std::printf(\"%d.%d \", prop.major, prop.minor);\n" + " }\n" + " return 0;\n" + "}\n") + enable_language(CUDA) + + try_run(__nvcc_res __compile_result ${PROJECT_BINARY_DIR} ${file} + COMPILE_OUTPUT_VARIABLE __compile_out + RUN_OUTPUT_VARIABLE __nvcc_out) + + if(__nvcc_res EQUAL 0 AND __compile_result) + # nvcc outputs text containing line breaks when building with MSVC. + # The line below prevents CMake from inserting a variable with line + # breaks in the cache + string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}") + string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}") + set(CUDA_gpu_detect_output ${__nvcc_out}) + else() + message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out} ${__compile_out}") + endif() + endif() + + if(NOT CUDA_gpu_detect_output) + message(WARNING "Automatic GPU detection failed. Building for all known architectures (${mshadow_known_gpu_archs}).") + set(${out_variable} ${mshadow_known_gpu_archs} PARENT_SCOPE) + else() + set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) + endif() +endfunction() + + +# This list will be used for CUDA_ARCH_NAME = All option +set(CUDA_KNOWN_GPU_ARCHITECTURES "Fermi" "Kepler" "Maxwell") + +# This list will be used for CUDA_ARCH_NAME = Common option (enabled by default) +set(CUDA_COMMON_GPU_ARCHITECTURES "3.0" "3.5" "5.0") + +if (CUDA_TOOLSET VERSION_GREATER "6.5") + list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Kepler+Tegra" "Kepler+Tesla" "Maxwell+Tegra") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2") +endif () + +if (CUDA_TOOLSET VERSION_GREATER "7.5") + list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Pascal") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "6.0" "6.1" "6.1+PTX") +else() + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2+PTX") +endif () + +################################################################################################ +# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME +# Usage: +# mshadow_select_nvcc_arch_flags(out_variable) +function(mshadow_select_nvcc_arch_flags out_variable) + + set(CUDA_ARCH_LIST "All" CACHE STRING "Select target NVIDIA GPU achitecture.") + set_property( CACHE CUDA_ARCH_LIST PROPERTY STRINGS "" "All" ${CUDA_KNOWN_GPU_ARCHITECTURES} ) + mark_as_advanced(CUDA_ARCH_NAME) + + + if("X${CUDA_ARCH_LIST}" STREQUAL "X" ) + set(CUDA_ARCH_LIST "All") + endif() + + set(cuda_arch_bin) + set(cuda_arch_ptx) + + if("${CUDA_ARCH_LIST}" STREQUAL "All") + set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES}) + elseif("${CUDA_ARCH_LIST}" STREQUAL "Common") + set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES}) + elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto") + mshadow_detect_installed_gpus(CUDA_ARCH_LIST) + message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}") + endif() + + # Now process the list and look for names + string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}") + list(REMOVE_DUPLICATES CUDA_ARCH_LIST) + foreach(arch_name ${CUDA_ARCH_LIST}) + set(arch_bin) + set(arch_ptx) + set(add_ptx FALSE) + # Check to see if we are compiling PTX + if(arch_name MATCHES "(.*)\\+PTX$") + set(add_ptx TRUE) + set(arch_name ${CMAKE_MATCH_1}) + endif() + if(arch_name MATCHES "^([0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$") + set(arch_bin ${CMAKE_MATCH_1}) + set(arch_ptx ${arch_bin}) + else() + # Look for it in our list of known architectures + if(${arch_name} STREQUAL "Fermi") + if (CUDA_TOOLSET VERSION_LESS "8.0") + set(arch_bin 2.0 "2.1(2.0)") + endif() + elseif(${arch_name} STREQUAL "Kepler+Tegra") + set(arch_bin 3.2) + elseif(${arch_name} STREQUAL "Kepler+Tesla") + set(arch_bin 3.7) + elseif(${arch_name} STREQUAL "Kepler") + set(arch_bin 3.0 3.5) + set(arch_ptx 3.5) + elseif(${arch_name} STREQUAL "Maxwell+Tegra") + set(arch_bin 5.3) + elseif(${arch_name} STREQUAL "Maxwell") + set(arch_bin 5.0 5.2) + set(arch_ptx 5.2) + elseif(${arch_name} STREQUAL "Pascal") + set(arch_bin 6.0 6.1) + set(arch_ptx 6.1) + else() + message(SEND_ERROR "Unknown CUDA Architecture Name ${arch_name} in CUDA_SELECT_NVCC_ARCH_FLAGS") + endif() + endif() + list(APPEND cuda_arch_bin ${arch_bin}) + if(add_ptx) + if (NOT arch_ptx) + set(arch_ptx ${arch_bin}) + endif() + list(APPEND cuda_arch_ptx ${arch_ptx}) + endif() + endforeach() + + # remove dots and convert to lists + string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}") + string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}") + string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}") + string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}") + + if(cuda_arch_bin) + list(REMOVE_DUPLICATES cuda_arch_bin) + endif() + if(cuda_arch_ptx) + list(REMOVE_DUPLICATES cuda_arch_ptx) + endif() + + message(STATUS "cuda arch bin: ${cuda_arch_bin}") + message(STATUS "cuda arch ptx: ${cuda_arch_ptx}") + set(nvcc_flags "") + set(nvcc_archs_readable "") + + # Tell NVCC to add binaries for the specified GPUs + foreach(arch ${cuda_arch_bin}) + if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)") + # User explicitly specified ARCH for the concrete CODE + list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) + list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1}) + else() + # User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch}) + list(APPEND nvcc_archs_readable sm_${arch}) + endif() + endforeach() + + # Tell NVCC to add PTX intermediate code for the specified architectures + foreach(arch ${cuda_arch_ptx}) + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch}) + list(APPEND nvcc_archs_readable compute_${arch}) + endforeach() + + string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") + set(${out_variable} ${nvcc_flags} PARENT_SCOPE) + set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE) +endfunction() + diff --git a/cmake/Modules/FindGperftools.cmake b/cmake/Modules/FindGperftools.cmake new file mode 100644 index 000000000000..180f4785d396 --- /dev/null +++ b/cmake/Modules/FindGperftools.cmake @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Tries to find Gperftools. +# +# Usage of this module as follows: +# +# find_package(Gperftools) +# +# Variables used by this module, they can change the default behaviour and need +# to be set before calling find_package: +# +# Gperftools_ROOT_DIR Set this variable to the root installation of +# Gperftools if the module has problems finding +# the proper installation path. +# +# Variables defined by this module: +# +# GPERFTOOLS_FOUND System has Gperftools libs/headers +# GPERFTOOLS_LIBRARIES The Gperftools libraries (tcmalloc & profiler) +# GPERFTOOLS_INCLUDE_DIR The location of Gperftools headers + +find_library(GPERFTOOLS_TCMALLOC + NAMES tcmalloc + HINTS ${Gperftools_ROOT_DIR}/lib) + +find_library(GPERFTOOLS_PROFILER + NAMES profiler + HINTS ${Gperftools_ROOT_DIR}/lib) + +find_library(GPERFTOOLS_TCMALLOC_AND_PROFILER + NAMES tcmalloc_and_profiler + HINTS ${Gperftools_ROOT_DIR}/lib) + +find_path(GPERFTOOLS_INCLUDE_DIR + NAMES gperftools/heap-profiler.h + HINTS ${Gperftools_ROOT_DIR}/include) + +set(GPERFTOOLS_LIBRARIES ${GPERFTOOLS_TCMALLOC_AND_PROFILER}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + Gperftools + DEFAULT_MSG + GPERFTOOLS_LIBRARIES + GPERFTOOLS_INCLUDE_DIR) + +mark_as_advanced( + Gperftools_ROOT_DIR + GPERFTOOLS_TCMALLOC + GPERFTOOLS_PROFILER + GPERFTOOLS_TCMALLOC_AND_PROFILER + GPERFTOOLS_LIBRARIES + GPERFTOOLS_INCLUDE_DIR) + diff --git a/cmake/Modules/FindJeMalloc.cmake b/cmake/Modules/FindJeMalloc.cmake index 57f47448f0a0..0ab1cec55f1f 100644 --- a/cmake/Modules/FindJeMalloc.cmake +++ b/cmake/Modules/FindJeMalloc.cmake @@ -1,28 +1,27 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at +# Distributed under the Boost Software License, Version 1.0. +# Boost Software License - Version 1.0 - August 17th, 2003 # -# http://www.apache.org/licenses/LICENSE-2.0 +# Permission is hereby granted, free of charge, to any person or organization +# obtaining a copy of the software and accompanying documentation covered by +# this license (the "Software") to use, reproduce, display, distribute, +# execute, and transmit the Software, and to prepare derivative works of the +# Software, and to permit third-parties to whom the Software is furnished to +# do so, all subject to the following: # -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -# Copyright (c) 2014 Thomas Heller -# Copyright (c) 2007-2012 Hartmut Kaiser -# Copyright (c) 2010-2011 Matt Anderson -# Copyright (c) 2011 Bryce Lelbach +# The copyright notices in the Software and this entire statement, including +# the above license grant, this restriction and the following disclaimer, +# must be included in all copies of the Software, in whole or in part, and +# all derivative works of the Software, unless such copies or derivative +# works are solely in the form of machine-executable object code generated by +# a source language processor. # -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +# SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +# FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. find_package(PkgConfig) pkg_check_modules(PC_JEMALLOC QUIET jemalloc) diff --git a/cmake/Modules/FindOpenBLAS.cmake b/cmake/Modules/FindOpenBLAS.cmake index 7c5272b7f779..a3a79caae461 100644 --- a/cmake/Modules/FindOpenBLAS.cmake +++ b/cmake/Modules/FindOpenBLAS.cmake @@ -15,9 +15,6 @@ # specific language governing permissions and limitations # under the License. -if(MKL_FOUND) - message(ERROR " OpenBLAS is not required since MKL is enabled") -endif() file(TO_CMAKE_PATH "$ENV{OpenBLAS_HOME}" OpenBLAS_HOME) file(TO_CMAKE_PATH "$ENV{OpenBLAS}" OpenBLAS_DIR) diff --git a/cpp-package/README.md b/cpp-package/README.md index dcfcbc81f3a7..cc656352c170 100644 --- a/cpp-package/README.md +++ b/cpp-package/README.md @@ -1,8 +1,12 @@ -# MxNet C++ Package +# MXNet C++ Package + -The examples dir containers examples for you to get started. -The lib dir should contain the compiled mxnet library. -Windows dir contains Visual C++ solution files and project files. +To build the package, please refer to . + +A basic tutorial can be found at . + +The example directory contains examples for you to get started. diff --git a/cpp-package/example/CMakeLists.txt b/cpp-package/example/CMakeLists.txt index 7083dfd014e9..b4cea68fbd05 100644 --- a/cpp-package/example/CMakeLists.txt +++ b/cpp-package/example/CMakeLists.txt @@ -17,11 +17,13 @@ file(GLOB_RECURSE CPP_PACKAGE_HEADERS "${CPP_PACKAGE_INCLUDE_DIR}/*.hpp" ) -add_custom_target( - cpp_package_deploy_library ALL - DEPENDS mxnet - COMMAND ${CMAKE_COMMAND} -E copy $ $ -) +if (MSVC) + add_custom_target( + cpp_package_deploy_library ALL + DEPENDS mxnet + COMMAND ${CMAKE_COMMAND} -E copy $ $ + ) +endif() include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include) diff --git a/cpp-package/example/alexnet.cpp b/cpp-package/example/alexnet.cpp index 4194b5bae905..dd5d2b4b06d6 100644 --- a/cpp-package/example/alexnet.cpp +++ b/cpp-package/example/alexnet.cpp @@ -23,8 +23,7 @@ #include #include #include "mxnet-cpp/MxNetCpp.h" -// Allow IDE to parse the types -#include "../include/mxnet-cpp/op.h" + using namespace std; using namespace mxnet::cpp; diff --git a/cpp-package/example/charRNN.cpp b/cpp-package/example/charRNN.cpp index f5fff853cbad..218d11efc9c8 100644 --- a/cpp-package/example/charRNN.cpp +++ b/cpp-package/example/charRNN.cpp @@ -43,8 +43,6 @@ #include #include "mxnet-cpp/MxNetCpp.h" -// Allow IDE to parse the types -#include "../include/mxnet-cpp/op.h" using namespace std; using namespace mxnet::cpp; diff --git a/cpp-package/example/feature_extract/README.md b/cpp-package/example/feature_extract/README.md new file mode 100644 index 000000000000..4367a0c2efe3 --- /dev/null +++ b/cpp-package/example/feature_extract/README.md @@ -0,0 +1,8 @@ +This example shows how to extract features with a pretrained model. + +You can first get a pretrained model from , +then prepare 2 pictures 1.jpg and 2.jpg to extract by executing `run.sh`. + +Note: +1. The filename of network parameters may vary, line 67 in `feature_extract.cpp` should be updated accordingly. +2. As the build system has changed a lot, to build this example, you need to put the compiled library `libmxnet.so` in `../lib/linux`. diff --git a/cpp-package/example/googlenet.cpp b/cpp-package/example/googlenet.cpp index ac0585e81a70..fe5dea6a1f58 100644 --- a/cpp-package/example/googlenet.cpp +++ b/cpp-package/example/googlenet.cpp @@ -22,10 +22,8 @@ #include #include #include - #include "mxnet-cpp/MxNetCpp.h" -// Allow IDE to parse the types -#include "../include/mxnet-cpp/op.h" + using namespace mxnet::cpp; @@ -159,8 +157,8 @@ int main(int argc, char const *argv[]) { train_iter.Reset(); while (train_iter.Next()) { auto data_batch = train_iter.GetDataBatch(); - args_map["data"] = data_batch.data.Copy(Context::gpu()); - args_map["data_label"] = data_batch.label.Copy(Context::gpu()); + data_batch.data.CopyTo(&args_map["data"]); + data_batch.label.CopyTo(&args_map["data_label"]); NDArray::WaitAll(); exec->Forward(true); exec->Backward(); @@ -174,8 +172,8 @@ int main(int argc, char const *argv[]) { val_iter.Reset(); while (val_iter.Next()) { auto data_batch = val_iter.GetDataBatch(); - args_map["data"] = data_batch.data.Copy(Context::gpu()); - args_map["data_label"] = data_batch.label.Copy(Context::gpu()); + data_batch.data.CopyTo(&args_map["data"]); + data_batch.label.CopyTo(&args_map["data_label"]); NDArray::WaitAll(); exec->Forward(false); NDArray::WaitAll(); diff --git a/cpp-package/example/inception_bn.cpp b/cpp-package/example/inception_bn.cpp index de21aadea9b5..e6f47904e0eb 100644 --- a/cpp-package/example/inception_bn.cpp +++ b/cpp-package/example/inception_bn.cpp @@ -19,13 +19,11 @@ /*! */ -#include #include #include #include #include "mxnet-cpp/MxNetCpp.h" -// Allow IDE to parse the types -#include "../include/mxnet-cpp/op.h" + using namespace mxnet::cpp; diff --git a/cpp-package/example/lenet.cpp b/cpp-package/example/lenet.cpp index 05cc4517fe1e..4c5a1f1165c1 100644 --- a/cpp-package/example/lenet.cpp +++ b/cpp-package/example/lenet.cpp @@ -19,14 +19,12 @@ /*! */ -#include #include #include #include #include #include "mxnet-cpp/MxNetCpp.h" -// Allow IDE to parse the types -#include "../include/mxnet-cpp/op.h" + using namespace std; using namespace mxnet::cpp; diff --git a/cpp-package/example/lenet_with_mxdataiter.cpp b/cpp-package/example/lenet_with_mxdataiter.cpp index 077f55622561..04f5cbca3a9d 100644 --- a/cpp-package/example/lenet_with_mxdataiter.cpp +++ b/cpp-package/example/lenet_with_mxdataiter.cpp @@ -19,14 +19,12 @@ /*! */ -#include -#include #include #include #include +#include #include "mxnet-cpp/MxNetCpp.h" -// Allow IDE to parse the types -#include "../include/mxnet-cpp/op.h" + using namespace std; using namespace mxnet::cpp; @@ -89,15 +87,15 @@ int main(int argc, char const *argv[]) { args_map["fc2_b"] = 0; auto train_iter = MXDataIter("MNISTIter") - .SetParam("image", "./train-images-idx3-ubyte") - .SetParam("label", "./train-labels-idx1-ubyte") + .SetParam("image", "./mnist_data/train-images-idx3-ubyte") + .SetParam("label", "./mnist_data/train-labels-idx1-ubyte") .SetParam("batch_size", batch_size) .SetParam("shuffle", 1) .SetParam("flat", 0) .CreateDataIter(); auto val_iter = MXDataIter("MNISTIter") - .SetParam("image", "./t10k-images-idx3-ubyte") - .SetParam("label", "./t10k-labels-idx1-ubyte") + .SetParam("image", "./mnist_data/t10k-images-idx3-ubyte") + .SetParam("label", "./mnist_data/t10k-labels-idx1-ubyte") .CreateDataIter(); Optimizer* opt = OptimizerRegistry::Find("ccsgd"); @@ -111,35 +109,62 @@ int main(int argc, char const *argv[]) { auto *exec = lenet.SimpleBind(Context::gpu(), args_map); auto arg_names = lenet.ListArguments(); + // Create metrics + Accuracy train_acc, val_acc; + for (int iter = 0; iter < max_epoch; ++iter) { - LG << "Epoch: " << iter; - train_iter.Reset(); - while (train_iter.Next()) { + int samples = 0; + train_iter.Reset(); + train_acc.Reset(); + + auto tic = chrono::system_clock::now(); + + while (train_iter.Next()) { + samples += batch_size; auto data_batch = train_iter.GetDataBatch(); - args_map["data"] = data_batch.data.Copy(Context::gpu()); - args_map["data_label"] = data_batch.label.Copy(Context::gpu()); + + data_batch.data.CopyTo(&args_map["data"]); + data_batch.label.CopyTo(&args_map["data_label"]); NDArray::WaitAll(); + + // Compute gradients exec->Forward(true); exec->Backward(); + // Update parameters for (size_t i = 0; i < arg_names.size(); ++i) { if (arg_names[i] == "data" || arg_names[i] == "data_label") continue; opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); } + + // Update metric + train_acc.Update(data_batch.label, exec->outputs[0]); } + // one epoch of training is finished + auto toc = chrono::system_clock::now(); + float duration = chrono::duration_cast(toc - tic).count() / 1000.0; + LG << "Epoch[" << iter << "] " << samples / duration \ + << " samples/sec " << "Train-Accuracy=" << train_acc.Get();; + + val_iter.Reset(); + val_acc.Reset(); + Accuracy acu; val_iter.Reset(); while (val_iter.Next()) { auto data_batch = val_iter.GetDataBatch(); - args_map["data"] = data_batch.data.Copy(Context::gpu()); - args_map["data_label"] = data_batch.label.Copy(Context::gpu()); + data_batch.data.CopyTo(&args_map["data"]); + data_batch.label.CopyTo(&args_map["data_label"]); NDArray::WaitAll(); + + // Only forward pass is enough as no gradient is needed when evaluating exec->Forward(false); NDArray::WaitAll(); acu.Update(data_batch.label, exec->outputs[0]); + val_acc.Update(data_batch.label, exec->outputs[0]); } - LG << "Accuracy: " << acu.Get(); + LG << "Epoch[" << iter << "] Val-Accuracy=" << val_acc.Get(); } delete exec; diff --git a/cpp-package/example/mlp.cpp b/cpp-package/example/mlp.cpp index c9c4ff245180..b40328da6e9a 100644 --- a/cpp-package/example/mlp.cpp +++ b/cpp-package/example/mlp.cpp @@ -24,8 +24,7 @@ #include #include #include "mxnet-cpp/MxNetCpp.h" -// Allow IDE to parse the types -#include "../include/mxnet-cpp/op.h" + using namespace std; using namespace mxnet::cpp; diff --git a/cpp-package/example/mlp_cpu.cpp b/cpp-package/example/mlp_cpu.cpp index 748c32e8c274..051bad1bd26a 100644 --- a/cpp-package/example/mlp_cpu.cpp +++ b/cpp-package/example/mlp_cpu.cpp @@ -106,8 +106,8 @@ int main(int argc, char** argv) { samples += batch_size; auto data_batch = train_iter.GetDataBatch(); // Set data and label - args["X"] = data_batch.data; - args["label"] = data_batch.label; + data_batch.data.CopyTo(&args["X"]); + data_batch.label.CopyTo(&args["label"]); // Compute gradients exec->Forward(true); @@ -124,8 +124,8 @@ int main(int argc, char** argv) { val_iter.Reset(); while (val_iter.Next()) { auto data_batch = val_iter.GetDataBatch(); - args["X"] = data_batch.data; - args["label"] = data_batch.label; + data_batch.data.CopyTo(&args["X"]); + data_batch.label.CopyTo(&args["label"]); // Forward pass is enough as no gradient is needed when evaluating exec->Forward(false); acc.Update(data_batch.label, exec->outputs[0]); diff --git a/cpp-package/example/resnet.cpp b/cpp-package/example/resnet.cpp index ca5643de9d81..03b3d7217648 100644 --- a/cpp-package/example/resnet.cpp +++ b/cpp-package/example/resnet.cpp @@ -19,13 +19,11 @@ /*! */ -#include #include #include #include #include "mxnet-cpp/MxNetCpp.h" -// Allow IDE to parse the types -#include "../include/mxnet-cpp/op.h" + using namespace mxnet::cpp; diff --git a/cpp-package/include/mxnet-cpp/MxNetCpp.h b/cpp-package/include/mxnet-cpp/MxNetCpp.h index 882bbead51e5..7ac039dd8816 100644 --- a/cpp-package/include/mxnet-cpp/MxNetCpp.h +++ b/cpp-package/include/mxnet-cpp/MxNetCpp.h @@ -18,6 +18,7 @@ */ /*! + * Copyright (c) 2016 by Contributors * \file MxNetCpp.h * \brief meta include file for mxnet.cpp * \author Chuntao Hong, Zhang Chen diff --git a/cpp-package/include/mxnet-cpp/base.h b/cpp-package/include/mxnet-cpp/base.h index 19375c0f81e8..d0f1bea15f00 100644 --- a/cpp-package/include/mxnet-cpp/base.h +++ b/cpp-package/include/mxnet-cpp/base.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2016 by Contributors * \file base.h * \brief base definitions for mxnetcpp * \author Chuntao Hong, Zhang Chen diff --git a/cpp-package/include/mxnet-cpp/executor.h b/cpp-package/include/mxnet-cpp/executor.h index 7e45ef56ab95..4cb28819de02 100644 --- a/cpp-package/include/mxnet-cpp/executor.h +++ b/cpp-package/include/mxnet-cpp/executor.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2016 by Contributors * \file executor.h * \brief executor definition * \author Chuntao Hong, Zhang Chen diff --git a/cpp-package/include/mxnet-cpp/initializer.h b/cpp-package/include/mxnet-cpp/initializer.h index e5bfa4da8eed..61e95469b76d 100644 --- a/cpp-package/include/mxnet-cpp/initializer.h +++ b/cpp-package/include/mxnet-cpp/initializer.h @@ -18,6 +18,7 @@ */ /*! + * Copyright (c) 2016 by Contributors * \file initializer.h * \brief random initializer * \author Zhang Chen diff --git a/cpp-package/include/mxnet-cpp/io.h b/cpp-package/include/mxnet-cpp/io.h index 7281416ae36a..7099d7d46fee 100644 --- a/cpp-package/include/mxnet-cpp/io.h +++ b/cpp-package/include/mxnet-cpp/io.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2016 by Contributors * \file operator.h * \brief definition of io, such as DataIter * \author Zhang Chen diff --git a/cpp-package/include/mxnet-cpp/kvstore.h b/cpp-package/include/mxnet-cpp/kvstore.h index 9c3c81f37ff7..d5aa1509a8f0 100644 --- a/cpp-package/include/mxnet-cpp/kvstore.h +++ b/cpp-package/include/mxnet-cpp/kvstore.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2016 by Contributors * \file kvstore.h * \brief definition of kvstore * \author Chuntao Hong diff --git a/cpp-package/include/mxnet-cpp/lr_scheduler.h b/cpp-package/include/mxnet-cpp/lr_scheduler.h index b9381a830a88..cffd1c7576e5 100644 --- a/cpp-package/include/mxnet-cpp/lr_scheduler.h +++ b/cpp-package/include/mxnet-cpp/lr_scheduler.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2017 by Contributors * \file lr_scheduler.h * \brief Scheduling learning rate */ diff --git a/cpp-package/include/mxnet-cpp/metric.h b/cpp-package/include/mxnet-cpp/metric.h index 6dbb197dae49..d015d8b4acc9 100644 --- a/cpp-package/include/mxnet-cpp/metric.h +++ b/cpp-package/include/mxnet-cpp/metric.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2016 by Contributors * \file base.h * \brief metrics defined * \author Zhang Chen diff --git a/cpp-package/include/mxnet-cpp/model.h b/cpp-package/include/mxnet-cpp/model.h index c8af6a476a52..b3a0a9dbef6e 100644 --- a/cpp-package/include/mxnet-cpp/model.h +++ b/cpp-package/include/mxnet-cpp/model.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2016 by Contributors * \file model.h * \brief MXNET.cpp model module * \author Zhang Chen diff --git a/cpp-package/include/mxnet-cpp/monitor.h b/cpp-package/include/mxnet-cpp/monitor.h index 33ef4855c1a9..c1494d0bd0a6 100644 --- a/cpp-package/include/mxnet-cpp/monitor.h +++ b/cpp-package/include/mxnet-cpp/monitor.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2017 by Contributors * \file monitor.h * \brief monitor definition * \author Xin Li diff --git a/cpp-package/include/mxnet-cpp/ndarray.h b/cpp-package/include/mxnet-cpp/ndarray.h index 9e196d0730a8..082c06981cf9 100644 --- a/cpp-package/include/mxnet-cpp/ndarray.h +++ b/cpp-package/include/mxnet-cpp/ndarray.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2016 by Contributors * \file ndarray.h * \brief definition of ndarray * \author Chuntao Hong, Zhang Chen diff --git a/cpp-package/include/mxnet-cpp/ndarray.hpp b/cpp-package/include/mxnet-cpp/ndarray.hpp index 5ed04a547b85..3c3b85d37326 100644 --- a/cpp-package/include/mxnet-cpp/ndarray.hpp +++ b/cpp-package/include/mxnet-cpp/ndarray.hpp @@ -33,6 +33,7 @@ #include #include "dmlc/logging.h" #include "mxnet-cpp/ndarray.h" +#include "mxnet-cpp/operator.h" namespace mxnet { namespace cpp { @@ -239,10 +240,10 @@ inline void NDArray::WaitToWrite() { } inline void NDArray::WaitAll() { CHECK_EQ(MXNDArrayWaitAll(), 0); } inline void NDArray::SampleGaussian(mx_float mu, mx_float sigma, NDArray *out) { - Operator("_sample_normal")(mu, sigma).Invoke(*out); + Operator("_random_normal")(mu, sigma).Invoke(*out); } inline void NDArray::SampleUniform(mx_float begin, mx_float end, NDArray *out) { - Operator("_sample_uniform")(begin, end).Invoke(*out); + Operator("_random_uniform")(begin, end).Invoke(*out); } inline void NDArray::Load(const std::string &file_name, std::vector *array_list, @@ -359,7 +360,6 @@ inline int NDArray::GetDType() const { inline const mx_float *NDArray::GetData() const { void *ret; - CHECK_NE(GetContext().GetDeviceType(), DeviceType::kGPU); MXNDArrayGetData(blob_ptr_->handle_, &ret); if (GetDType() != 0) { return NULL; diff --git a/cpp-package/include/mxnet-cpp/op_map.h b/cpp-package/include/mxnet-cpp/op_map.h index b54cc0ae2c01..17746d1fa596 100644 --- a/cpp-package/include/mxnet-cpp/op_map.h +++ b/cpp-package/include/mxnet-cpp/op_map.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2016 by Contributors * \file op_map.h * \brief definition of OpMap * \author Chuntao Hong diff --git a/cpp-package/include/mxnet-cpp/op_suppl.h b/cpp-package/include/mxnet-cpp/op_suppl.h index 52cdae772a68..4f3011c17caa 100644 --- a/cpp-package/include/mxnet-cpp/op_suppl.h +++ b/cpp-package/include/mxnet-cpp/op_suppl.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2016 by Contributors * \file op_suppl.h * \brief A supplement and amendment of the operators from op.h * \author Zhang Chen, zhubuntu, Xin Li diff --git a/cpp-package/include/mxnet-cpp/op_util.h b/cpp-package/include/mxnet-cpp/op_util.h index 20e06a851814..b2b442fd8a88 100644 --- a/cpp-package/include/mxnet-cpp/op_util.h +++ b/cpp-package/include/mxnet-cpp/op_util.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2017 by Contributors * \file op_util.h * \brief operator helper functions * \author Chris Olivier diff --git a/cpp-package/include/mxnet-cpp/operator.h b/cpp-package/include/mxnet-cpp/operator.h index 02bd21ebe8c9..4d4bedac8fec 100644 --- a/cpp-package/include/mxnet-cpp/operator.h +++ b/cpp-package/include/mxnet-cpp/operator.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2016 by Contributors * \file operator.h * \brief definition of operator * \author Chuntao Hong, Zhang Chen diff --git a/cpp-package/include/mxnet-cpp/optimizer.h b/cpp-package/include/mxnet-cpp/optimizer.h index e57da5d95ceb..4aebb55c50d1 100644 --- a/cpp-package/include/mxnet-cpp/optimizer.h +++ b/cpp-package/include/mxnet-cpp/optimizer.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2016 by Contributors * \file optimizer.h * \brief definition of optimizer * \author Chuntao Hong, Zhang Chen diff --git a/cpp-package/include/mxnet-cpp/shape.h b/cpp-package/include/mxnet-cpp/shape.h index 2793e436c072..01ee47636351 100644 --- a/cpp-package/include/mxnet-cpp/shape.h +++ b/cpp-package/include/mxnet-cpp/shape.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2016 by Contributors * \file shape.h * \brief definition of shape * \author Chuntao Hong, Zhang Chen diff --git a/cpp-package/include/mxnet-cpp/symbol.h b/cpp-package/include/mxnet-cpp/symbol.h index 888aebd6f3ad..127ef156eb62 100644 --- a/cpp-package/include/mxnet-cpp/symbol.h +++ b/cpp-package/include/mxnet-cpp/symbol.h @@ -18,6 +18,7 @@ */ /*! +* Copyright (c) 2016 by Contributors * \file symbol.h * \brief definition of symbol * \author Chuntao Hong, Zhang Chen diff --git a/cpp-package/include/mxnet-cpp/symbol.hpp b/cpp-package/include/mxnet-cpp/symbol.hpp index ee1a11e26a40..11590fad6041 100644 --- a/cpp-package/include/mxnet-cpp/symbol.hpp +++ b/cpp-package/include/mxnet-cpp/symbol.hpp @@ -103,6 +103,7 @@ inline Symbol Symbol::Load(const std::string &file_name) { return Symbol(handle); } inline Symbol Symbol::LoadJSON(const std::string &json_str) { + op_map(); SymbolHandle handle; CHECK_EQ(MXSymbolCreateFromJSON(json_str.c_str(), &(handle)), 0); return Symbol(handle); diff --git a/cpp-package/scripts/OpWrapperGenerator.py b/cpp-package/scripts/OpWrapperGenerator.py index 83495febcc63..ac957730d689 100644 --- a/cpp-package/scripts/OpWrapperGenerator.py +++ b/cpp-package/scripts/OpWrapperGenerator.py @@ -124,12 +124,15 @@ def __init__(self, opName = '', argName = '', typeString = '', descString = ''): self.defaultString = self.enum.GetDefaultValueString(self.defaultString) elif self.defaultString == 'None': self.defaultString = self.type + '()' - elif self.defaultString == 'False': - self.defaultString = 'false' - elif self.defaultString == 'True': - self.defaultString = 'true' + elif self.type == "bool": + if self.defaultString == "1" or self.defaultString == "True": + self.defaultString = "true" + else: + self.defaultString = "false" elif self.defaultString[0] == '(': self.defaultString = 'Shape' + self.defaultString + elif self.defaultString[0] == '[': + self.defaultString = 'Shape(' + self.defaultString[1:-1] + ")" elif self.type == 'dmlc::optional': self.defaultString = self.type + '(' + self.defaultString + ')' elif typeString.startswith('caffe-layer-parameter'): diff --git a/dmlc-core b/dmlc-core index 71bfbd3a9460..87b7ffa59eb7 160000 --- a/dmlc-core +++ b/dmlc-core @@ -1 +1 @@ -Subproject commit 71bfbd3a946075cea66ca9e19bad86dd33c19b46 +Subproject commit 87b7ffa59eb78f753073ac56f5f60e46d930b93c diff --git a/docker/install/perl.sh b/docker/install/perl.sh index a981746bc18d..af49952f97d6 100755 --- a/docker/install/perl.sh +++ b/docker/install/perl.sh @@ -19,4 +19,4 @@ # install libraries for mxnet's perl package on ubuntu apt-get update && apt-get install -y libmouse-perl pdl cpanminus swig libgraphviz-perl -cpanm -q Function::Parameters +cpanm -q Function::Parameters Hash::Ordered diff --git a/docker/install/scala.sh b/docker/install/scala.sh index bb0bb9c900d4..b1bfe28074f0 100755 --- a/docker/install/scala.sh +++ b/docker/install/scala.sh @@ -19,7 +19,15 @@ # install libraries for mxnet's scala package on ubuntu -apt-get install -y maven default-jdk + +apt-get install -y software-properties-common +add-apt-repository -y ppa:webupd8team/java +apt-get update +echo "oracle-java8-installer shared/accepted-oracle-license-v1-1 select true" | debconf-set-selections +apt-get install -y oracle-java8-installer +apt-get install -y oracle-java8-set-default + +apt-get install -y maven wget http://downloads.lightbend.com/scala/2.11.8/scala-2.11.8.deb dpkg -i scala-2.11.8.deb diff --git a/docker/run.sh b/docker/run.sh old mode 100644 new mode 100755 diff --git a/docker_multiarch/.gitignore b/docker_multiarch/.gitignore new file mode 100644 index 000000000000..2a07fbf7c51e --- /dev/null +++ b/docker_multiarch/.gitignore @@ -0,0 +1,2 @@ +mxnet/ +build/ diff --git a/docker_multiarch/Dockerfile.build.android.arm64 b/docker_multiarch/Dockerfile.build.android.arm64 new file mode 100644 index 000000000000..995e718bfb5f --- /dev/null +++ b/docker_multiarch/Dockerfile.build.android.arm64 @@ -0,0 +1,77 @@ +# -*- mode: dockerfile -*- +FROM dockcross/base:latest +MAINTAINER Pedro Larroy "pllarroy@amazon.com" + +# The cross-compiling emulator +RUN apt-get update && apt-get install -y \ + qemu-user \ + qemu-user-static \ + unzip + +ENV CROSS_TRIPLE=aarch64-linux-android +ENV CROSS_ROOT=/usr/${CROSS_TRIPLE} +ENV AS=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-as \ + AR=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ar \ + CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-gcc \ + CPP=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-cpp \ + CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-g++ \ + LD=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ld + +ENV ANDROID_NDK_REVISION 15c +RUN mkdir -p /build && \ + cd /build && \ + curl -O https://dl.google.com/android/repository/android-ndk-r${ANDROID_NDK_REVISION}-linux-x86_64.zip && \ + unzip ./android-ndk-r${ANDROID_NDK_REVISION}-linux-x86_64.zip && \ + cd android-ndk-r${ANDROID_NDK_REVISION} && \ + ./build/tools/make_standalone_toolchain.py \ + --stl=libc++ \ + --arch arm64 \ + --api 21 \ + --install-dir=${CROSS_ROOT} && \ + cd / && \ + rm -rf /build && \ + find ${CROSS_ROOT} -exec chmod a+r '{}' \; && \ + find ${CROSS_ROOT} -executable -exec chmod a+x '{}' \; + + +ENV DEFAULT_DOCKCROSS_IMAGE dockcross/android-arm + +# COPY Toolchain.cmake ${CROSS_ROOT}/ +# ENV CMAKE_TOOLCHAIN_FILE ${CROSS_ROOT}/Toolchain.cmake + +# Build-time metadata as defined at http://label-schema.org +ARG BUILD_DATE +ARG IMAGE +ARG VCS_REF +ARG VCS_URL +LABEL org.label-schema.build-date=$BUILD_DATE \ + org.label-schema.name=$IMAGE \ + org.label-schema.vcs-ref=$VCS_REF \ + org.label-schema.vcs-url=$VCS_URL \ + org.label-schema.schema-version="1.0" + +ENV ARCH aarch64 + +# Build OpenBLAS +# https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android +RUN git clone https://github.com/xianyi/OpenBLAS.git && \ + cd OpenBLAS && \ + make -j$(nproc) TARGET=ARMV8 ARM_SOFTFP_ABI=1 HOSTCC=gcc NOFORTRAN=1 libs + +ENV OPENBLAS_ROOT /work/OpenBLAS +ENV LIBRARY_PATH /work/OpenBLAS/lib/:/work/OpenBLAS/:$LIBRARY_PATH +ENV CPLUS_INCLUDE_PATH /work/OpenBLAS/include/:/work/OpenBLAS/:$CPLUS_INCLUDE_PATH +WORKDIR /work + +ENV CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang +ENV CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang++ +ENV BUILD_OPTS "USE_BLAS=openblas USE_SSE=0 DMLC_LOG_STACK_TRACE=0 USE_OPENCV=0 USE_LAPACK=0" + +# Build MXNet +ADD mxnet mxnet +ADD arm.crosscompile.android.mk /work/mxnet/make/config.mk +RUN cd mxnet && \ + make -j$(nproc) $BUILD_OPTS + +WORKDIR /work/build/ +RUN cp /work/mxnet/lib/* . diff --git a/docker_multiarch/Dockerfile.build.android.armv7 b/docker_multiarch/Dockerfile.build.android.armv7 new file mode 100644 index 000000000000..12d53a412223 --- /dev/null +++ b/docker_multiarch/Dockerfile.build.android.armv7 @@ -0,0 +1,78 @@ +# -*- mode: dockerfile -*- +FROM dockcross/base:latest +MAINTAINER Pedro Larroy "pllarroy@amazon.com" + +# The cross-compiling emulator +RUN apt-get update && apt-get install -y \ + qemu-user \ + qemu-user-static \ + unzip + +ENV CROSS_TRIPLE=arm-linux-androideabi +ENV CROSS_ROOT=/usr/${CROSS_TRIPLE} +ENV AS=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-as \ + AR=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ar \ + CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-gcc \ + CPP=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-cpp \ + CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-g++ \ + LD=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ld + +ENV ANDROID_NDK_REVISION 15c +RUN mkdir -p /build && \ + cd /build && \ + curl -O https://dl.google.com/android/repository/android-ndk-r${ANDROID_NDK_REVISION}-linux-x86_64.zip && \ + unzip ./android-ndk-r${ANDROID_NDK_REVISION}-linux-x86_64.zip && \ + cd android-ndk-r${ANDROID_NDK_REVISION} && \ + ./build/tools/make_standalone_toolchain.py \ + --stl=libc++ \ + --arch arm \ + --api 16 \ + --install-dir=${CROSS_ROOT} && \ + cd / && \ + rm -rf /build && \ + find ${CROSS_ROOT} -exec chmod a+r '{}' \; && \ + find ${CROSS_ROOT} -executable -exec chmod a+x '{}' \; + + +ENV DEFAULT_DOCKCROSS_IMAGE dockcross/android-arm + +# COPY Toolchain.cmake ${CROSS_ROOT}/ +# ENV CMAKE_TOOLCHAIN_FILE ${CROSS_ROOT}/Toolchain.cmake + +# Build-time metadata as defined at http://label-schema.org +ARG BUILD_DATE +ARG IMAGE +ARG VCS_REF +ARG VCS_URL +LABEL org.label-schema.build-date=$BUILD_DATE \ + org.label-schema.name=$IMAGE \ + org.label-schema.vcs-ref=$VCS_REF \ + org.label-schema.vcs-url=$VCS_URL \ + org.label-schema.schema-version="1.0" + +ENV CC /usr/arm-linux-androideabi/bin/arm-linux-androideabi-gcc +ENV CXX /usr/arm-linux-androideabi/bin/arm-linux-androideabi-g++ + +# Build OpenBLAS +# https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android +RUN git clone https://github.com/xianyi/OpenBLAS.git && \ + cd OpenBLAS && \ + make -j$(nproc) TARGET=ARMV7 ARM_SOFTFP_ABI=1 HOSTCC=gcc NOFORTRAN=1 libs + +ENV OPENBLAS_ROOT /work/OpenBLAS +ENV LIBRARY_PATH /work/OpenBLAS/lib/:/work/OpenBLAS/:$LIBRARY_PATH +ENV CPLUS_INCLUDE_PATH /work/OpenBLAS/include/:/work/OpenBLAS/:$CPLUS_INCLUDE_PATH +WORKDIR /work + +ENV CC /usr/arm-linux-androideabi/bin/arm-linux-androideabi-clang +ENV CXX /usr/arm-linux-androideabi/bin/arm-linux-androideabi-clang++ +ENV BUILD_OPTS "USE_BLAS=openblas USE_SSE=0 DMLC_LOG_STACK_TRACE=0 USE_OPENCV=0 USE_LAPACK=0" + +# Build MXNet +ADD mxnet mxnet +ADD arm.crosscompile.android.mk /work/mxnet/make/config.mk +RUN cd mxnet && \ + make -j$(nproc) $BUILD_OPTS + +WORKDIR /work/build/ +RUN cp /work/mxnet/lib/* . diff --git a/docker_multiarch/Dockerfile.build.arm64 b/docker_multiarch/Dockerfile.build.arm64 new file mode 100644 index 000000000000..33f7b3f45e9b --- /dev/null +++ b/docker_multiarch/Dockerfile.build.arm64 @@ -0,0 +1,37 @@ +# -*- mode: dockerfile -*- +# dockerfile to build libmxnet.so for armv7 +FROM dockcross/linux-arm64 + +ENV ARCH aarch64 +ENV BUILD_OPTS "USE_BLAS=openblas USE_SSE=0 USE_OPENCV=0" +ENV CC /usr/bin/aarch64-linux-gnu-gcc +ENV CXX /usr/bin/aarch64-linux-gnu-g++ +ENV FC /usr/bin/aarch64-linux-gnu-gfortran-4.9 +ENV HOSTCC gcc + +WORKDIR /work + +# Build OpenBLAS +ADD https://api.github.com/repos/xianyi/OpenBLAS/git/refs/heads/master /tmp/openblas_version.json +RUN git clone https://github.com/xianyi/OpenBLAS.git && \ + cd OpenBLAS && \ + make -j$(nproc) TARGET=ARMV8 && \ + make install && \ + ln -s /opt/OpenBLAS/lib/libopenblas.so /usr/lib/libopenblas.so && \ + ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/libopenblas.a && \ + ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/liblapack.a + +ENV LD_LIBRARY_PATH /opt/OpenBLAS/lib +ENV CPLUS_INCLUDE_PATH /opt/OpenBLAS/include + +# Build MXNet +#ADD https://api.github.com/repos/apache/incubator-mxnet/git/refs/heads/master mxnet_version.json +#RUN git clone --recursive https://github.com/apache/incubator-mxnet.git mxnet +ADD mxnet mxnet + +WORKDIR /work/mxnet +ADD arm.crosscompile.mk make/config.mk +RUN make -j$(nproc) $BUILD_OPTS + +WORKDIR /work/build/ +RUN cp /work/mxnet/lib/* . diff --git a/docker_multiarch/Dockerfile.build.armv6 b/docker_multiarch/Dockerfile.build.armv6 new file mode 100644 index 000000000000..9adfc5b49d42 --- /dev/null +++ b/docker_multiarch/Dockerfile.build.armv6 @@ -0,0 +1,38 @@ +# -*- mode: dockerfile -*- +# dockerfile to build libmxnet.so for armv7 +FROM dockcross/linux-armv6 + +ENV ARCH armv6l +ENV BUILD_OPTS "USE_BLAS=openblas USE_SSE=0 USE_OPENCV=0" +ENV CC /usr/bin/arm-linux-gnueabihf-gcc +ENV CXX /usr/bin/arm-linux-gnueabihf-g++ +ENV FC /usr/bin/arm-linux-gnueabihf-gfortran +ENV HOSTCC gcc + +WORKDIR /work + +# Build OpenBLAS +ADD https://api.github.com/repos/xianyi/OpenBLAS/git/refs/heads/master openblas_version.json +RUN git clone https://github.com/xianyi/OpenBLAS.git && \ + cd OpenBLAS && \ + make -j$(nproc) TARGET=ARMV6 && \ + make install && \ + ln -s /opt/OpenBLAS/lib/libopenblas.so /usr/lib/gcc/arm-linux-gnueabihf/libopenblas.so && \ + ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/gcc/arm-linux-gnueabihf/libopenblas.a && \ + ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/gcc/arm-linux-gnueabihf/liblapack.a && \ + ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/liblapack.a + +ENV LD_LIBRARY_PATH /opt/OpenBLAS/lib +ENV CPLUS_INCLUDE_PATH /opt/OpenBLAS/include + +# Build MXNet +#ADD https://api.github.com/repos/apache/incubator-mxnet/git/refs/heads/master mxnet_version.json +#RUN git clone --recursive https://github.com/apache/incubator-mxnet.git mxnet +ADD mxnet mxnet + +WORKDIR /work/mxnet +ADD arm.crosscompile.mk make/config.mk +RUN make -j$(nproc) $BUILD_OPTS + +WORKDIR /work/build/ +RUN cp /work/mxnet/lib/* . diff --git a/docker_multiarch/Dockerfile.build.armv7 b/docker_multiarch/Dockerfile.build.armv7 new file mode 100644 index 000000000000..740f2b22da64 --- /dev/null +++ b/docker_multiarch/Dockerfile.build.armv7 @@ -0,0 +1,26 @@ +# -*- mode: dockerfile -*- +# dockerfile to build libmxnet.so for armv7 +FROM dockcross/linux-armv7 + +RUN apt-get update && \ + apt-get install -y libopenblas-dev:armhf && \ + rm -rf /var/lib/apt/lists/* + +ENV ARCH armv71 +ENV CC /usr/bin/arm-linux-gnueabihf-gcc +ENV CXX /usr/bin/arm-linux-gnueabihf-g++ +ENV BUILD_OPTS "USE_OPENCV=0 USE_BLAS=openblas USE_SSE=0" + +# Build MXNet + +WORKDIR /work +#ADD https://api.github.com/repos/apache/incubator-mxnet/git/refs/heads/master mxnet_version.json +#RUN git clone --recursive https://github.com/apache/incubator-mxnet.git mxnet +ADD mxnet mxnet + +WORKDIR /work/mxnet +ADD arm.crosscompile.mk make/config.mk +RUN make -j$(nproc) $BUILD_OPTS + +WORKDIR /work/build/ +RUN cp /work/mxnet/lib/* . diff --git a/docker_multiarch/Dockerfile.build.cmake.ubuntu-17.04 b/docker_multiarch/Dockerfile.build.cmake.ubuntu-17.04 new file mode 100644 index 000000000000..cf0a981e04fc --- /dev/null +++ b/docker_multiarch/Dockerfile.build.cmake.ubuntu-17.04 @@ -0,0 +1,37 @@ +FROM ubuntu:17.04 + + +RUN apt-get update &&\ + apt-get install -y wget python3.5 gcc-4.9 gcc-5 g++-4.9 g++-5 cmake less python3-pip python3-dev\ + build-essential git pkgconf\ + libopenblas-dev liblapack-dev\ + maven default-jdk\ + ninja-build\ + libgtest-dev\ + &&\ + rm -rf /var/lib/apt/lists/* + + + +########################### +# Build gtest +WORKDIR /work/googletest +RUN cmake /usr/src/googletest/googletest/ -GNinja +RUN ninja +RUN cp libgtest.a /usr/lib +########################### + + + +WORKDIR /work +#RUN git clone --recursive https://github.com/apache/incubator-mxnet.git mxnet +ADD mxnet mxnet + +WORKDIR mxnet/build +RUN cmake -DUSE_CUDA=OFF -DUSE_OPENCV=OFF -GNinja .. +RUN ninja + + +# Copy artifacts +RUN mkdir -p /work/build +RUN cp *.a *.so /work/build diff --git a/docker_multiarch/Dockerfile.build.ubuntu-16.04-cuda_8.0_cudnn5 b/docker_multiarch/Dockerfile.build.ubuntu-16.04-cuda_8.0_cudnn5 new file mode 100644 index 000000000000..071a351b3125 --- /dev/null +++ b/docker_multiarch/Dockerfile.build.ubuntu-16.04-cuda_8.0_cudnn5 @@ -0,0 +1,32 @@ +FROM nvidia/cuda:8.0-cudnn5-devel + +RUN apt-get update &&\ + apt-get install -y wget python3.5 gcc-4.9 gcc-5 g++-4.9 g++-5 cmake less python3-pip python3-dev\ + build-essential git pkgconf\ + libopenblas-dev liblapack-dev\ + maven default-jdk\ + &&\ + rm -rf /var/lib/apt/lists/* + +WORKDIR /work +#RUN git clone --recursive https://github.com/apache/incubator-mxnet.git mxnet +ADD mxnet mxnet + +# Compile MxNet +ENV BUILD_OPTS "USE_OPENCV=0 USE_BLAS=openblas USE_CUDA=1 USE_CUDA_PATH=/usr/local/cuda USE_CUDNN=1" +WORKDIR /work/mxnet +RUN make -j$(nproc) $BUILD_OPTS + +WORKDIR /work/build/ +RUN cp /work/mxnet/lib/* . + +# Scala packag +#WORKDIR /work +#RUN wget --quiet http://downloads.lightbend.com/scala/2.11.8/scala-2.11.8.deb +#RUN dpkg -i scala-2.11.8.deb && rm scala-2.11.8.deb + +#WORKDIR /work/mxnet +#RUN make scalapkg $BUILD_OPTS + +#WORKDIR /work/build/scala_gpu +#RUN cp /work/mxnet/scala-package/assembly/linux-x86_64-gpu/target/*.jar . diff --git a/docker_multiarch/Dockerfile.build.ubuntu-17.04 b/docker_multiarch/Dockerfile.build.ubuntu-17.04 new file mode 100644 index 000000000000..63b3c0716d90 --- /dev/null +++ b/docker_multiarch/Dockerfile.build.ubuntu-17.04 @@ -0,0 +1,30 @@ +# +# Base image to build MXNet from source in ubuntu +# +# Other images depend on it, so build it like: +# +# docker build -f Dockerfile.build.ubuntu-17.04 -t mxnet.build.ubuntu-17.04 . +# +FROM ubuntu:17.04 + + +RUN apt-get update &&\ + apt-get install -y wget python3.5 gcc-4.9 gcc-5 g++-4.9 g++-5 cmake less python3-pip python3-dev\ + build-essential git pkgconf\ + libopenblas-dev liblapack-dev\ + maven default-jdk + +RUN rm -rf /var/lib/apt/lists/* + +WORKDIR /work +#RUN git clone --recursive https://github.com/apache/incubator-mxnet.git mxnet +ADD mxnet mxnet + +# Compile MxNet +ENV BUILD_OPTS "USE_OPENCV=0 USE_BLAS=openblas" +WORKDIR /work/mxnet +RUN make -j$(nproc) $BUILD_OPTS + +# Copy artifacts +WORKDIR /work/build/ +RUN cp /work/mxnet/lib/* . diff --git a/docker_multiarch/Dockerfile.build.ubuntu-17.04.scala.docker b/docker_multiarch/Dockerfile.build.ubuntu-17.04.scala.docker new file mode 100644 index 000000000000..a31ce893783c --- /dev/null +++ b/docker_multiarch/Dockerfile.build.ubuntu-17.04.scala.docker @@ -0,0 +1,17 @@ +# Before building this image you would need to build MXNet by executing: +# docker build -f Dockerfile.build.ubuntu-17.04 -t mxnet.build.ubuntu-17.04 . +# if you haven't done it before. + +FROM mxnet.build.ubuntu-17.04 + +# Scala package +WORKDIR /work +RUN wget --quiet http://downloads.lightbend.com/scala/2.11.8/scala-2.11.8.deb +RUN dpkg -i scala-2.11.8.deb && rm scala-2.11.8.deb + +WORKDIR /work/mxnet +RUN make scalapkg $BUILD_OPTS + +WORKDIR /work/build/scala +RUN cp /work/mxnet/scala-package/core/target/*.jar . +RUN cp /work/mxnet/scala-package/assembly/linux-x86_64-cpu/target/*.jar . diff --git a/docker_multiarch/Dockerfile.run.ubuntu-17.04.julia b/docker_multiarch/Dockerfile.run.ubuntu-17.04.julia new file mode 100644 index 000000000000..df3a036d83fe --- /dev/null +++ b/docker_multiarch/Dockerfile.run.ubuntu-17.04.julia @@ -0,0 +1,24 @@ +# Before building this image you would need to build MXNet by executing: +# docker build -f Dockerfile.build.ubuntu-17.04 -t mxnet.build.ubuntu-17.04 . +# if you haven't done it before. + +FROM mxnet.build.ubuntu-17.04 + +ENV DEBIAN_FRONTEND=noninteractive + +################## +# Julia installation +RUN wget -q https://julialang.s3.amazonaws.com/bin/linux/x64/0.5/julia-0.5.1-linux-x86_64.tar.gz\ + && tar -zxf julia-0.5.1-linux-x86_64.tar.gz\ + && rm julia-0.5.1-linux-x86_64.tar.gz\ + && ln -s $(pwd)/julia-6445c82d00/bin/julia /usr/bin/julia +################## + + +ENV MXNET_HOME /work/mxnet +WORKDIR /work/mxnet +RUN julia -e 'Pkg.add("MXNet")' + + + + diff --git a/docker_multiarch/Dockerfile.run.ubuntu-17.04.perl b/docker_multiarch/Dockerfile.run.ubuntu-17.04.perl new file mode 100644 index 000000000000..a1a637def23b --- /dev/null +++ b/docker_multiarch/Dockerfile.run.ubuntu-17.04.perl @@ -0,0 +1,23 @@ +# Before building this image you would need to build MXNet by executing: +# docker build -f Dockerfile.build.ubuntu-17.04 -t mxnet.build.ubuntu-17.04 . +# if you haven't done it before. + +FROM mxnet.build.ubuntu-17.04 + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update +RUN apt-get install -y\ + libmouse-perl pdl cpanminus swig libgraphviz-perl +RUN rm -rf /var/lib/apt/lists/* + +RUN cpanm -q Function::Parameters + +WORKDIR /work/mxnet/perl-package/AI-MXNetCAPI +RUN perl Makefile.PL && make install + +WORKDIR /work/mxnet/perl-package/AI-NNVMCAPI/ +RUN perl Makefile.PL && make install + +WORKDIR /work/mxnet/perl-package/AI-MXNet/ +RUN perl Makefile.PL && make install diff --git a/docker_multiarch/Dockerfile.run.ubuntu-17.04.python b/docker_multiarch/Dockerfile.run.ubuntu-17.04.python new file mode 100644 index 000000000000..8bd262b891c4 --- /dev/null +++ b/docker_multiarch/Dockerfile.run.ubuntu-17.04.python @@ -0,0 +1,17 @@ +# Before building this image you would need to build MXNet by executing: +# docker build -f Dockerfile.build.ubuntu-17.04 -t mxnet.build.ubuntu-17.04 . +# if you haven't done it before. + +FROM mxnet.build.ubuntu-17.04 + +ENV DEBIAN_FRONTEND=noninteractive + + +RUN apt-get update +RUN apt-get install -y python-pip +RUN rm -rf /var/lib/apt/lists/* + +WORKDIR /work/mxnet/python +RUN pip3 install -e . +RUN pip install -e . + diff --git a/docker_multiarch/Dockerfile.run.ubuntu-17.04.r b/docker_multiarch/Dockerfile.run.ubuntu-17.04.r new file mode 100644 index 000000000000..493c7f0ef179 --- /dev/null +++ b/docker_multiarch/Dockerfile.run.ubuntu-17.04.r @@ -0,0 +1,36 @@ +# Before building this image you would need to build MXNet by executing: +# docker build -f Dockerfile.build.ubuntu-17.04 -t mxnet.build.ubuntu-17.04 . +# if you haven't done it before. + +FROM mxnet.build.ubuntu-17.04 + +ENV DEBIAN_FRONTEND=noninteractive +#ENV BUILD_OPTS "USE_OPENCV=0 USE_BLAS=openblas GTEST_PATH=/usr/src/googletest/googletest" + +################## +# R installation +RUN apt-get update +#RUN apt-get remove -y gnupg +#RUN apt-get install -y --reinstall\ +# gnupg2 dirmngr + +RUN apt-get install -y dirmngr libopencv-dev +RUN echo "deb http://cran.rstudio.com/bin/linux/ubuntu zesty/" >> /etc/apt/sources.list +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E084DAB9 + +RUN apt-get install -y\ + r-base r-base-core r-recommended r-base-dev libxml2-dev libxt-dev libssl-dev libcurl4-openssl-dev + + +WORKDIR /work/mxnet +RUN cp R-package/DESCRIPTION . +RUN Rscript -e "install.packages('devtools', repo = 'https://cran.rstudio.com')" +RUN Rscript -e "library(devtools); library(methods); options(repos=c(CRAN='https://cran.rstudio.com')); install_deps(dependencies = TRUE)" + + +################## +# MXNet R package +RUN make rpkg +RUN R CMD INSTALL mxnet_current_r.tar.gz +################## + diff --git a/docker_multiarch/Dockerfile.test.ubuntu-17.04 b/docker_multiarch/Dockerfile.test.ubuntu-17.04 new file mode 100644 index 000000000000..1b0c145f488d --- /dev/null +++ b/docker_multiarch/Dockerfile.test.ubuntu-17.04 @@ -0,0 +1,41 @@ +FROM ubuntu-17.04 +RUN apt-get update &&\ + apt-get install -y python3-nose python-nose python-pip libgtest-dev valgrind ninja-build\ + &&\ + rm -rf /var/lib/apt/lists/* + +########################### +# Unit tests +# Build google test +WORKDIR /work/googletest +RUN cmake /usr/src/googletest/googletest/ -GNinja +RUN ninja +# FIXME +RUN mkdir -p /usr/src/googletest/googletest/lib/ +RUN cp libgtest.a /usr/src/googletest/googletest/lib/ + +ENV BUILD_OPTS "USE_OPENCV=0 USE_BLAS=openblas GTEST_PATH=/usr/src/googletest/googletest" + +WORKDIR /work/mxnet +RUN make -j$(nproc) test $BUILD_OPTS +ENV MXNET_ENGINE_INFO=true +RUN build/tests/cpp/mxnet_test +RUN valgrind build/tests/cpp/mxnet_test +############################ + +############################ +# Python tests +WORKDIR /work/mxnet/python +RUN pip3 install -e . +RUN pip install -e . + +WORKDIR /work/mxnet +RUN nosetests3 --verbose tests/python/unittest +RUN nosetests --verbose tests/python/unittest +############################ + + +############################ +# Scala tests +RUN make scalatest $BUILD_OPTS +############################ diff --git a/docker_multiarch/README.md b/docker_multiarch/README.md new file mode 100644 index 000000000000..a463d15bc82d --- /dev/null +++ b/docker_multiarch/README.md @@ -0,0 +1,42 @@ +# Dockerized multi-architecture build + +These docker files and utilities will build mxnet and run tests for different architectures using cross compilation and produce +runtime binary artifacts. + +This utilities require that you have docker installed. [Docker CE](https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/#install-docker) is recommended. + + +To compile for all the supported architectures you can run the script +``` +$ ./tool.py +``` + +To build a single arch, you can invoke docker directly: + +``` +$ docker build -f Dockerfile.build. -t . +``` + +Or call the dockerfile directly: + +``` +docker build -f -t . +``` + +Or pass the architecture id to the tool: +``` +$ ./tool.py -a ubuntu-17.04 +``` + +By convention all the Dockerfiles produce the build artifacts on /work/build so they can be copied +after. + + +The tool will leave the resulting artifacts on the build/ directory + +# TODO + +- Handle dependencies between docker files, for example having a yaml file with the dependency graph + so they can be built in the right order. Right now the dependency is very simple so simple + alphabetical sorting of the images does the trick. + diff --git a/make/pip_linux_cpu.mk b/docker_multiarch/arm.crosscompile.android.mk similarity index 80% rename from make/pip_linux_cpu.mk rename to docker_multiarch/arm.crosscompile.android.mk index 01bc2702ebb7..22a5bfb6810e 100644 --- a/make/pip_linux_cpu.mk +++ b/docker_multiarch/arm.crosscompile.android.mk @@ -1,13 +1,26 @@ #------------------------------------------------------------------------------- -# Template configuration for compiling mxnet for making python wheel +# Template configuration for compiling mxnet +# +# If you want to change the configuration, please use the following +# steps. Assume you are on the root directory of mxnet. First copy the this +# file so that any local changes will be ignored by git +# +# $ cp make/config.mk . +# +# Next modify the according entries, and then compile by +# +# $ make +# +# or build in parallel with 8 threads +# +# $ make -j8 #------------------------------------------------------------------------------- #--------------------- -# choice of compiler +# We do not assign compilers here. Often when cross-compiling these will already +# be set correctly. #-------------------- -export CC = gcc -export CXX = g++ export NVCC = nvcc # whether compile with options for MXNet developer @@ -20,32 +33,16 @@ DEBUG = 0 USE_PROFILER = # the additional link flags you want to add -ADD_LDFLAGS += -lopencv_core -lopencv_imgproc -lopencv_highgui +# TODO: Move flags here +ADD_LDFLAGS=-static-libstdc++ -L/work/OpenBLAS/ # the additional compile flags you want to add -ADD_CFLAGS += -Ldeps/lib -Ideps/include +ADD_CFLAGS = #--------------------------------------------- # matrix computation libraries for CPU/GPU #--------------------------------------------- -# choose the version of blas you want to use -# can be: mkl, blas, atlas, openblas -# in default use atlas for linux while apple for osx -USE_BLAS=openblas - -# whether use lapack during compilation -# only effective when compiled with blas versions openblas/apple/atlas/mkl -USE_LAPACK = 1 - -# path to lapack library in case of a non-standard installation -USE_LAPACK_PATH = - -# whether use opencv during compilation -# you can disable it, however, you will not able to use -# imbin iterator -USE_OPENCV = 1 - # whether use CUDA during compile USE_CUDA = 0 @@ -60,10 +57,14 @@ USE_CUDNN = 0 # whether use cuda runtime compiling for writing kernels in native language (i.e. Python) USE_NVRTC = 0 +# whether use opencv during compilation +# you can disable it, however, you will not able to use +# imbin iterator +USE_OPENCV = 0 + # use openmp for parallelization USE_OPENMP = 1 - # MKL ML Library for Intel CPU/Xeon Phi # Please refer to MKL_README.md for details @@ -82,25 +83,27 @@ USE_MKL2017_EXPERIMENTAL = 0 # whether use NNPACK library USE_NNPACK = 0 +# For arm builds we're using openblas +USE_BLAS = openblas + +# whether use lapack during compilation +# only effective when compiled with blas versions openblas/apple/atlas/mkl +USE_LAPACK = 0 + +# path to lapack library in case of a non-standard installation +USE_LAPACK_PATH = + # add path to intel library, you may need it for MKL, if you did not add the path # to environment variable USE_INTEL_PATH = NONE -# If use MKL, choose static link automatically to allow python wrapper +# If use MKL only for BLAS, choose static link automatically to allow python wrapper +ifeq ($(USE_MKL2017), 0) ifeq ($(USE_BLAS), mkl) USE_STATIC_MKL = 1 -else -USE_STATIC_MKL = NONE endif - -#---------------------------- -# Settings for power and arm arch -#---------------------------- -ARCH := $(shell uname -a) -ifneq (,$(filter $(ARCH), armv6l armv7l powerpc64le ppc64le aarch64)) - USE_SSE=0 else - USE_SSE=1 +USE_STATIC_MKL = NONE endif #---------------------------- @@ -129,6 +132,12 @@ USE_S3 = 0 # path to folders containing projects specific operators that you don't want to put in src/operators EXTRA_OPERATORS = +#---------------------------- +# other features +#---------------------------- + +# Create C++ interface package +USE_CPP_PACKAGE = 0 #---------------------------- # plugins diff --git a/docker_multiarch/arm.crosscompile.mk b/docker_multiarch/arm.crosscompile.mk new file mode 100644 index 000000000000..fea4e7777c22 --- /dev/null +++ b/docker_multiarch/arm.crosscompile.mk @@ -0,0 +1,162 @@ +#------------------------------------------------------------------------------- +# Template configuration for compiling mxnet +# +# If you want to change the configuration, please use the following +# steps. Assume you are on the root directory of mxnet. First copy the this +# file so that any local changes will be ignored by git +# +# $ cp make/config.mk . +# +# Next modify the according entries, and then compile by +# +# $ make +# +# or build in parallel with 8 threads +# +# $ make -j8 +#------------------------------------------------------------------------------- + +#--------------------- +# We do not assign compilers here. Often when cross-compiling these will already +# be set correctly. +#-------------------- + +export NVCC = nvcc + +# whether compile with options for MXNet developer +DEV = 0 + +# whether compile with debug +DEBUG = 0 + +# whether compiler with profiler +USE_PROFILER = + +# the additional link flags you want to add +# TODO: Move flags here +ADD_LDFLAGS=-static-libstdc++ + +# the additional compile flags you want to add +ADD_CFLAGS = + +#--------------------------------------------- +# matrix computation libraries for CPU/GPU +#--------------------------------------------- + +# whether use CUDA during compile +USE_CUDA = 0 + +# add the path to CUDA library to link and compile flag +# if you have already add them to environment variable, leave it as NONE +# USE_CUDA_PATH = /usr/local/cuda +USE_CUDA_PATH = NONE + +# whether use CuDNN R3 library +USE_CUDNN = 0 + +# whether use cuda runtime compiling for writing kernels in native language (i.e. Python) +USE_NVRTC = 0 + +# whether use opencv during compilation +# you can disable it, however, you will not able to use +# imbin iterator +USE_OPENCV = 0 + +# use openmp for parallelization +USE_OPENMP = 1 + +# MKL ML Library for Intel CPU/Xeon Phi +# Please refer to MKL_README.md for details + +# MKL ML Library folder, need to be root for /usr/local +# Change to User Home directory for standard user +# For USE_BLAS!=mkl only +MKLML_ROOT=/usr/local + +# whether use MKL2017 library +USE_MKL2017 = 0 + +# whether use MKL2017 experimental feature for high performance +# Prerequisite USE_MKL2017=1 +USE_MKL2017_EXPERIMENTAL = 0 + +# whether use NNPACK library +USE_NNPACK = 0 + +# For arm builds we're using openblas +USE_BLAS = openblas + +# whether use lapack during compilation +# only effective when compiled with blas versions openblas/apple/atlas/mkl +USE_LAPACK = 1 + +# path to lapack library in case of a non-standard installation +USE_LAPACK_PATH = + +# add path to intel library, you may need it for MKL, if you did not add the path +# to environment variable +USE_INTEL_PATH = NONE + +# If use MKL only for BLAS, choose static link automatically to allow python wrapper +ifeq ($(USE_MKL2017), 0) +ifeq ($(USE_BLAS), mkl) +USE_STATIC_MKL = 1 +endif +else +USE_STATIC_MKL = NONE +endif + +#---------------------------- +# distributed computing +#---------------------------- + +# whether or not to enable multi-machine supporting +USE_DIST_KVSTORE = 0 + +# whether or not allow to read and write HDFS directly. If yes, then hadoop is +# required +USE_HDFS = 0 + +# path to libjvm.so. required if USE_HDFS=1 +LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server + +# whether or not allow to read and write AWS S3 directly. If yes, then +# libcurl4-openssl-dev is required, it can be installed on Ubuntu by +# sudo apt-get install -y libcurl4-openssl-dev +USE_S3 = 0 + +#---------------------------- +# additional operators +#---------------------------- + +# path to folders containing projects specific operators that you don't want to put in src/operators +EXTRA_OPERATORS = + +#---------------------------- +# other features +#---------------------------- + +# Create C++ interface package +USE_CPP_PACKAGE = 0 + +#---------------------------- +# plugins +#---------------------------- + +# whether to use caffe integration. This requires installing caffe. +# You also need to add CAFFE_PATH/build/lib to your LD_LIBRARY_PATH +# CAFFE_PATH = $(HOME)/caffe +# MXNET_PLUGINS += plugin/caffe/caffe.mk + +# whether to use torch integration. This requires installing torch. +# You also need to add TORCH_PATH/install/lib to your LD_LIBRARY_PATH +# TORCH_PATH = $(HOME)/torch +# MXNET_PLUGINS += plugin/torch/torch.mk + +# WARPCTC_PATH = $(HOME)/warp-ctc +# MXNET_PLUGINS += plugin/warpctc/warpctc.mk + +# whether to use sframe integration. This requires build sframe +# git@github.com:dato-code/SFrame.git +# SFRAME_PATH = $(HOME)/SFrame +# MXNET_PLUGINS += plugin/sframe/plugin.mk \ No newline at end of file diff --git a/docker_multiarch/tool.py b/docker_multiarch/tool.py new file mode 100755 index 000000000000..d0003ec05822 --- /dev/null +++ b/docker_multiarch/tool.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Multi arch dockerized build tool. + +""" + +__author__ = 'Pedro Larroy' +__version__ = '0.1' + +import os +import sys +import subprocess +import logging +import argparse +from subprocess import check_call +import glob +import re + +class CmdResult(object): + def __init__(self, std_out, std_err, status_code): + self.std_out = std_out + self.std_err = std_err + self.status_code = status_code if status_code is not None else 0 + + def __str__(self): + return "%s, %s, %s" % (self.std_out, self.std_err, self.status_code) + +def run(cmd, fail_on_error=True): + logging.debug("executing shell command:\n" + cmd) + proc = subprocess.Popen( + cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + std_out, std_err = proc.communicate() + if fail_on_error: + if proc.returncode != 0: + logging.warn('Error running command: {}'.format(cmd)) + assert proc.returncode == 0, std_err + res = CmdResult(std_out.decode('utf-8'), std_err.decode('utf-8'), proc.returncode) + return res + + +def mkdir_p(d): + rev_path_list = list() + head = d + while len(head) and head != os.sep: + rev_path_list.append(head) + (head, tail) = os.path.split(head) + + rev_path_list.reverse() + for p in rev_path_list: + try: + os.mkdir(p) + except OSError as e: + if e.errno != 17: + raise + +def get_arches(): + """Get a list of architectures given our dockerfiles""" + dockerfiles = glob.glob("Dockerfile.build.*") + dockerfiles = list(filter(lambda x: x[-1] != '~', dockerfiles)) + arches = list(map(lambda x: re.sub(r"Dockerfile.build.(.*)", r"\1", x), dockerfiles)) + arches.sort() + return arches + +def sync_source(): + logging.info("Copying sources") + check_call(["rsync","-a","--delete","--exclude=\".git/\"",'--exclude=/docker_multiarch/',"../","mxnet"]) + +def get_docker_tag(arch): + return "mxnet.build.{0}".format(arch) + +def get_dockerfile(arch): + return "Dockerfile.build.{0}".format(arch) + +def build(arch): + """Build the given architecture in the container""" + assert arch in get_arches(), "No such architecture {0}, Dockerfile.build.{0} not found".format(arch) + logging.info("Building for target platform {0}".format(arch)) + check_call(["docker", "build", + "-f", get_dockerfile(arch), + "-t", get_docker_tag(arch), + "."]) + +def collect_artifacts(arch): + """Collects the artifacts built inside the docker container to the local fs""" + def artifact_path(arch): + return "{}/build/{}".format(os.getcwd(), arch) + logging.info("Collect artifacts from build in {0}".format(artifact_path(arch))) + mkdir_p("build/{}".format(arch)) + + # Mount artifact_path on /$arch inside the container and copy the build output so we can access + # locally from the host fs + check_call(["docker","run", + "-v", "{}:/{}".format(artifact_path(arch), arch), + get_docker_tag(arch), + "bash", "-c", "cp -r /work/build/* /{}".format(arch)]) + +def main(): + logging.getLogger().setLevel(logging.INFO) + logging.basicConfig(format='%(asctime)-15s %(message)s') + + parser = argparse.ArgumentParser() + parser.add_argument("-a", "--arch", + help="Architecture", + type=str) + + parser.add_argument("-l", "--list_arch", + help="List architectures", + action='store_true') + args = parser.parse_args() + + if args.list_arch: + arches = get_arches() + print(arches) + + elif args.arch: + sync_source() + build(args.arch) + collect_artifacts(args.arch) + + else: + arches = get_arches() + logging.info("Building for all architectures: {}".format(arches)) + logging.info("Artifacts will be produced in the build/ directory.") + sync_source() + for arch in arches: + build(arch) + collect_artifacts(arch) + + return 0 + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/docs/README.md b/docs/README.md index 7780f8961cc6..ad64b76d648b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,22 +1,91 @@ # MXNet documentation +## How to build MXNet website + +MXNet Documentation Website is built with [sphinx 1.5.1](http://www.sphinx-doc.org/en/1.5.1/intro.html). + A built version of document is available at http://mxnet.io -To build the documents locally, we need to first install [docker](docker.com). +To build the documents locally, we need to first install [docker](https://docker.com). Then use the following commands to clone and build the documents. ```bash -git clone --recursive https://github.com/dmlc/mxnet +git clone --recursive https://github.com/apache/incubator-mxnet.git mxnet cd mxnet && make docs ``` +In case docker method is not available, there is an alternate method: +```bash +sudo pip install sphinx==1.5.1 CommonMark==0.5.4 breathe mock==1.0.1 recommonmark pypandoc +cd mxnet/docs && make html USE_OPENMP=0 +``` + The results will be available at `docs/_build/html/`. Note: - If C++ codes have been changed, we suggest to remove the previous results to trigger the rebuild for all pages, namely run `make clean_docs`. -- If C++ codes are failed to build, run `make clean` +- If C++ code fails to build, run `make clean` - If CSS or javascript are changed, we often need to do a *force refresh* in the browser to clear the cache. +- If search doesn't work, we need to `make clean` and rebuild. + +## File structure + +1. Static files such as css, javascript and html templates are under `_static` folder: +- Javascript files are under `_static/js` folder. +- Layout templates and landing page html file are under `_static/mxnet-theme` folder. +- `_static/mxnet.css` contains all MXNet website styles. + +2. Sphinx converts markdowns files to html. Page contents are markdown files. Each content folder +contains an index file as landing page. + +3. There are some utility scripts to help building website, such as `mxdoc.py` and `build_version_doc/`. +They are used to manipulate website contents during building. + +## Production website building process + +[Apache Jenkins MXNet website building job](https://builds.apache.org/job/incubator-mxnet-build-site/) is used to build MXNet website. +There are two ways to trigger this job. +First is nightly build for master branch. +Second is manually trigger job when a new version is released. This will build for new version. + +The job will fetch mxnet repository, build MXNet website and push all static files to [host repository](https://github.com/apache/incubator-mxnet-site.git). +The host repo is hooked with [Apache gitbox](https://gitbox.apache.org/repos/asf?p=incubator-mxnet-site.git;a=summary) to host website. + +## Build versioning website + +`make docs` doesn't add any version information. Version information is added by [Apache Jenkins MXNet website building job](https://builds.apache.org/job/incubator-mxnet-build-site/). +Landing page will point to the latest released version. Older versions and master version are placed under versions folder. +After completing website update and testing it locally, we also need to build and test versioning website. + +Python Beautifulsoup is the dependency: + +```bash +sudo pip install beautifulsoup4 +``` + +The essenitial part of adding version is to use `AddPackageLink.py` to add Apache source packages and +`AddVersion.py` to update all version related information on website. These two scripts are used in `build_doc.sh` and `build_all_version`. + +`build_doc.sh` is used by Apache Jenkins MXNet webiste building job to incremental adding version. We don't need it +for local website development. + +`build_all_version.sh` is to rebuild versioning website locally and is useful to verify versioning website locally. +We need to specify which versions to be built. This can be set in `tag_list` variable at the beginning of the script. +Version order should be from latest to oldest and placing master at the end. We may also want to modify `mxnet_url` +variable to our own repository for local testing. Another use case is to completely rebuild website with specific versions. +Although this will not happen often, we can use it to rebuld whole website and push to [host repo](https://github.com/apache/incubator-mxnet-site.git). + +```bash +./build_all_version.sh +``` + +## Develop notes + +1. `AddVersion.py` depends on Beautiful library, which requires target html files to have close tags. Although open tag html can still be rendered by browser, it will be problematic for Beautifulsoup. + +2. `AddVersion.py` and `AddPackageLink.py` manipulates contents for website. If there are layout changes, it may break these two scripts. We need to change scripts respectively. + diff --git a/docs/_static/cn.svg b/docs/_static/cn.svg index 515176d60f15..9fb3fc084c3c 100644 --- a/docs/_static/cn.svg +++ b/docs/_static/cn.svg @@ -1,4 +1,24 @@ + + + Flag of the People's Republic of China diff --git a/docs/_static/jquery-1.11.1.js b/docs/_static/jquery-1.11.1.js index d4b67f7e6c1a..a0ee94130c93 100644 --- a/docs/_static/jquery-1.11.1.js +++ b/docs/_static/jquery-1.11.1.js @@ -6,7 +6,28 @@ * http://sizzlejs.com/ * * Copyright 2005, 2014 jQuery Foundation, Inc. and other contributors + * ---- * Released under the MIT license + * MIT License + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * ---- * http://jquery.org/license * * Date: 2014-05-01T17:42Z diff --git a/docs/_static/js/auto_module_index.js b/docs/_static/js/auto_module_index.js index 7f4e185655d3..83bdbf37173b 100644 --- a/docs/_static/js/auto_module_index.js +++ b/docs/_static/js/auto_module_index.js @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + function auto_index(module) { $(document).ready(function () { // find all classes or functions @@ -21,4 +40,4 @@ function auto_index(module) { html += ""; li_node.append(html); }); -} \ No newline at end of file +} diff --git a/docs/_static/js/clipboard.min.js b/docs/_static/js/clipboard.min.js old mode 100755 new mode 100644 index 1993676f9928..a23c4e1384d5 --- a/docs/_static/js/clipboard.min.js +++ b/docs/_static/js/clipboard.min.js @@ -1,7 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + /*! * clipboard.js v1.6.1 * https://zenorocha.github.io/clipboard.js * * Licensed MIT © Zeno Rocha */ -!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var t;t="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,t.Clipboard=e()}}(function(){var e,t,n;return function e(t,n,o){function i(a,c){if(!n[a]){if(!t[a]){var l="function"==typeof require&&require;if(!c&&l)return l(a,!0);if(r)return r(a,!0);var u=new Error("Cannot find module '"+a+"'");throw u.code="MODULE_NOT_FOUND",u}var s=n[a]={exports:{}};t[a][0].call(s.exports,function(e){var n=t[a][1][e];return i(n?n:e)},s,s.exports,e,t,n,o)}return n[a].exports}for(var r="function"==typeof require&&require,a=0;a0&&void 0!==arguments[0]?arguments[0]:{};this.action=t.action,this.emitter=t.emitter,this.target=t.target,this.text=t.text,this.trigger=t.trigger,this.selectedText=""}},{key:"initSelection",value:function e(){this.text?this.selectFake():this.target&&this.selectTarget()}},{key:"selectFake",value:function e(){var t=this,n="rtl"==document.documentElement.getAttribute("dir");this.removeFake(),this.fakeHandlerCallback=function(){return t.removeFake()},this.fakeHandler=document.body.addEventListener("click",this.fakeHandlerCallback)||!0,this.fakeElem=document.createElement("textarea"),this.fakeElem.style.fontSize="12pt",this.fakeElem.style.border="0",this.fakeElem.style.padding="0",this.fakeElem.style.margin="0",this.fakeElem.style.position="absolute",this.fakeElem.style[n?"right":"left"]="-9999px";var o=window.pageYOffset||document.documentElement.scrollTop;this.fakeElem.style.top=o+"px",this.fakeElem.setAttribute("readonly",""),this.fakeElem.value=this.text,document.body.appendChild(this.fakeElem),this.selectedText=(0,i.default)(this.fakeElem),this.copyText()}},{key:"removeFake",value:function e(){this.fakeHandler&&(document.body.removeEventListener("click",this.fakeHandlerCallback),this.fakeHandler=null,this.fakeHandlerCallback=null),this.fakeElem&&(document.body.removeChild(this.fakeElem),this.fakeElem=null)}},{key:"selectTarget",value:function e(){this.selectedText=(0,i.default)(this.target),this.copyText()}},{key:"copyText",value:function e(){var t=void 0;try{t=document.execCommand(this.action)}catch(e){t=!1}this.handleResult(t)}},{key:"handleResult",value:function e(t){this.emitter.emit(t?"success":"error",{action:this.action,text:this.selectedText,trigger:this.trigger,clearSelection:this.clearSelection.bind(this)})}},{key:"clearSelection",value:function e(){this.target&&this.target.blur(),window.getSelection().removeAllRanges()}},{key:"destroy",value:function e(){this.removeFake()}},{key:"action",set:function e(){var t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"copy";if(this._action=t,"copy"!==this._action&&"cut"!==this._action)throw new Error('Invalid "action" value, use either "copy" or "cut"')},get:function e(){return this._action}},{key:"target",set:function e(t){if(void 0!==t){if(!t||"object"!==("undefined"==typeof t?"undefined":r(t))||1!==t.nodeType)throw new Error('Invalid "target" value, use a valid Element');if("copy"===this.action&&t.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if("cut"===this.action&&(t.hasAttribute("readonly")||t.hasAttribute("disabled")))throw new Error('Invalid "target" attribute. You can\'t cut text from elements with "readonly" or "disabled" attributes');this._target=t}},get:function e(){return this._target}}]),e}();e.exports=c})},{select:5}],8:[function(t,n,o){!function(i,r){if("function"==typeof e&&e.amd)e(["module","./clipboard-action","tiny-emitter","good-listener"],r);else if("undefined"!=typeof o)r(n,t("./clipboard-action"),t("tiny-emitter"),t("good-listener"));else{var a={exports:{}};r(a,i.clipboardAction,i.tinyEmitter,i.goodListener),i.clipboard=a.exports}}(this,function(e,t,n,o){"use strict";function i(e){return e&&e.__esModule?e:{default:e}}function r(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}function a(e,t){if(!e)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return!t||"object"!=typeof t&&"function"!=typeof t?e:t}function c(e,t){if("function"!=typeof t&&null!==t)throw new TypeError("Super expression must either be null or a function, not "+typeof t);e.prototype=Object.create(t&&t.prototype,{constructor:{value:e,enumerable:!1,writable:!0,configurable:!0}}),t&&(Object.setPrototypeOf?Object.setPrototypeOf(e,t):e.__proto__=t)}function l(e,t){var n="data-clipboard-"+e;if(t.hasAttribute(n))return t.getAttribute(n)}var u=i(t),s=i(n),f=i(o),d=function(){function e(e,t){for(var n=0;n0&&void 0!==arguments[0]?arguments[0]:{};this.action="function"==typeof t.action?t.action:this.defaultAction,this.target="function"==typeof t.target?t.target:this.defaultTarget,this.text="function"==typeof t.text?t.text:this.defaultText}},{key:"listenClick",value:function e(t){var n=this;this.listener=(0,f.default)(t,"click",function(e){return n.onClick(e)})}},{key:"onClick",value:function e(t){var n=t.delegateTarget||t.currentTarget;this.clipboardAction&&(this.clipboardAction=null),this.clipboardAction=new u.default({action:this.action(n),target:this.target(n),text:this.text(n),trigger:n,emitter:this})}},{key:"defaultAction",value:function e(t){return l("action",t)}},{key:"defaultTarget",value:function e(t){var n=l("target",t);if(n)return document.querySelector(n)}},{key:"defaultText",value:function e(t){return l("text",t)}},{key:"destroy",value:function e(){this.listener.destroy(),this.clipboardAction&&(this.clipboardAction.destroy(),this.clipboardAction=null)}}],[{key:"isSupported",value:function e(){var t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:["copy","cut"],n="string"==typeof t?[t]:t,o=!!document.queryCommandSupported;return n.forEach(function(e){o=o&&!!document.queryCommandSupported(e)}),o}}]),t}(s.default);e.exports=h})},{"./clipboard-action":7,"good-listener":4,"tiny-emitter":6}]},{},[8])(8)}); \ No newline at end of file +!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var t;t="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,t.Clipboard=e()}}(function(){var e,t,n;return function e(t,n,o){function i(a,c){if(!n[a]){if(!t[a]){var l="function"==typeof require&&require;if(!c&&l)return l(a,!0);if(r)return r(a,!0);var u=new Error("Cannot find module '"+a+"'");throw u.code="MODULE_NOT_FOUND",u}var s=n[a]={exports:{}};t[a][0].call(s.exports,function(e){var n=t[a][1][e];return i(n?n:e)},s,s.exports,e,t,n,o)}return n[a].exports}for(var r="function"==typeof require&&require,a=0;a0&&void 0!==arguments[0]?arguments[0]:{};this.action=t.action,this.emitter=t.emitter,this.target=t.target,this.text=t.text,this.trigger=t.trigger,this.selectedText=""}},{key:"initSelection",value:function e(){this.text?this.selectFake():this.target&&this.selectTarget()}},{key:"selectFake",value:function e(){var t=this,n="rtl"==document.documentElement.getAttribute("dir");this.removeFake(),this.fakeHandlerCallback=function(){return t.removeFake()},this.fakeHandler=document.body.addEventListener("click",this.fakeHandlerCallback)||!0,this.fakeElem=document.createElement("textarea"),this.fakeElem.style.fontSize="12pt",this.fakeElem.style.border="0",this.fakeElem.style.padding="0",this.fakeElem.style.margin="0",this.fakeElem.style.position="absolute",this.fakeElem.style[n?"right":"left"]="-9999px";var o=window.pageYOffset||document.documentElement.scrollTop;this.fakeElem.style.top=o+"px",this.fakeElem.setAttribute("readonly",""),this.fakeElem.value=this.text,document.body.appendChild(this.fakeElem),this.selectedText=(0,i.default)(this.fakeElem),this.copyText()}},{key:"removeFake",value:function e(){this.fakeHandler&&(document.body.removeEventListener("click",this.fakeHandlerCallback),this.fakeHandler=null,this.fakeHandlerCallback=null),this.fakeElem&&(document.body.removeChild(this.fakeElem),this.fakeElem=null)}},{key:"selectTarget",value:function e(){this.selectedText=(0,i.default)(this.target),this.copyText()}},{key:"copyText",value:function e(){var t=void 0;try{t=document.execCommand(this.action)}catch(e){t=!1}this.handleResult(t)}},{key:"handleResult",value:function e(t){this.emitter.emit(t?"success":"error",{action:this.action,text:this.selectedText,trigger:this.trigger,clearSelection:this.clearSelection.bind(this)})}},{key:"clearSelection",value:function e(){this.target&&this.target.blur(),window.getSelection().removeAllRanges()}},{key:"destroy",value:function e(){this.removeFake()}},{key:"action",set:function e(){var t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"copy";if(this._action=t,"copy"!==this._action&&"cut"!==this._action)throw new Error('Invalid "action" value, use either "copy" or "cut"')},get:function e(){return this._action}},{key:"target",set:function e(t){if(void 0!==t){if(!t||"object"!==("undefined"==typeof t?"undefined":r(t))||1!==t.nodeType)throw new Error('Invalid "target" value, use a valid Element');if("copy"===this.action&&t.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if("cut"===this.action&&(t.hasAttribute("readonly")||t.hasAttribute("disabled")))throw new Error('Invalid "target" attribute. You can\'t cut text from elements with "readonly" or "disabled" attributes');this._target=t}},get:function e(){return this._target}}]),e}();e.exports=c})},{select:5}],8:[function(t,n,o){!function(i,r){if("function"==typeof e&&e.amd)e(["module","./clipboard-action","tiny-emitter","good-listener"],r);else if("undefined"!=typeof o)r(n,t("./clipboard-action"),t("tiny-emitter"),t("good-listener"));else{var a={exports:{}};r(a,i.clipboardAction,i.tinyEmitter,i.goodListener),i.clipboard=a.exports}}(this,function(e,t,n,o){"use strict";function i(e){return e&&e.__esModule?e:{default:e}}function r(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}function a(e,t){if(!e)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return!t||"object"!=typeof t&&"function"!=typeof t?e:t}function c(e,t){if("function"!=typeof t&&null!==t)throw new TypeError("Super expression must either be null or a function, not "+typeof t);e.prototype=Object.create(t&&t.prototype,{constructor:{value:e,enumerable:!1,writable:!0,configurable:!0}}),t&&(Object.setPrototypeOf?Object.setPrototypeOf(e,t):e.__proto__=t)}function l(e,t){var n="data-clipboard-"+e;if(t.hasAttribute(n))return t.getAttribute(n)}var u=i(t),s=i(n),f=i(o),d=function(){function e(e,t){for(var n=0;n0&&void 0!==arguments[0]?arguments[0]:{};this.action="function"==typeof t.action?t.action:this.defaultAction,this.target="function"==typeof t.target?t.target:this.defaultTarget,this.text="function"==typeof t.text?t.text:this.defaultText}},{key:"listenClick",value:function e(t){var n=this;this.listener=(0,f.default)(t,"click",function(e){return n.onClick(e)})}},{key:"onClick",value:function e(t){var n=t.delegateTarget||t.currentTarget;this.clipboardAction&&(this.clipboardAction=null),this.clipboardAction=new u.default({action:this.action(n),target:this.target(n),text:this.text(n),trigger:n,emitter:this})}},{key:"defaultAction",value:function e(t){return l("action",t)}},{key:"defaultTarget",value:function e(t){var n=l("target",t);if(n)return document.querySelector(n)}},{key:"defaultText",value:function e(t){return l("text",t)}},{key:"destroy",value:function e(){this.listener.destroy(),this.clipboardAction&&(this.clipboardAction.destroy(),this.clipboardAction=null)}}],[{key:"isSupported",value:function e(){var t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:["copy","cut"],n="string"==typeof t?[t]:t,o=!!document.queryCommandSupported;return n.forEach(function(e){o=o&&!!document.queryCommandSupported(e)}),o}}]),t}(s.default);e.exports=h})},{"./clipboard-action":7,"good-listener":4,"tiny-emitter":6}]},{},[8])(8)}); diff --git a/docs/_static/js/copycode.js b/docs/_static/js/copycode.js index 141e797fa2c2..b1c268cfec3b 100644 --- a/docs/_static/js/copycode.js +++ b/docs/_static/js/copycode.js @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + /*Copy code to clipboard*/ LANG_GP = {'default':'>>> ', 'python':'>>> ' , 'scala':'scala>', 'julia':'julia> ', 'r':'> ', 'perl':'pdl>' , 'cpp':'', 'bash':'$ '}; @@ -6,6 +25,7 @@ function addBtn() { 'data-placement="bottom" title="Copy to clipboard">' for (var lang in LANG_GP) { codeBlock = $('div .highlight-' + lang); + codeBlock.css('position', 'relative') codeBlock.prepend(copyBtn); codeBlock.find('.copy-btn').addClass(lang); codeBlock.hover( diff --git a/docs/_static/js/navbar.js b/docs/_static/js/navbar.js index 91e0356d9263..e3601c409ee0 100644 --- a/docs/_static/js/navbar.js +++ b/docs/_static/js/navbar.js @@ -1,9 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + var searchBox = $("#search-input-wrap"); -var TITLE = ['/get_started/', '/tutorials/', '/how_to/', '/api/', '/architecture/']; -var APIsubMenu; +var TITLE = ['/get_started/', '/tutorials/', '/gluon/' , '/api/', '/community/contribute.html', ]; +var DOC_TITLE = ['/faq/', '/architecture/', '/model_zoo/']; +var APISubmenu, versionSubmenu, docSubmenu; $("#burgerMenu").children().each(function () { - if($(this).children().first().html() == 'API') APIsubMenu = $(this).clone() - if($(this).children().first().html().startsWith('Versions')) VersionsubMenu = $(this).clone() + if($(this).children().first().html() == 'API') APISubmenu = $(this).clone(); + if($(this).children().first().html().startsWith('Versions')) versionSubmenu = $(this).clone(); + if($(this).children().first().html() == 'Docs') docSubmenu= $(this).clone(); }); function navbar() { @@ -40,10 +61,13 @@ function navbar() { $("#plusMenu").empty(); for (var i = 0; i < plusMenuList.length; ++i) { if(plusMenuList[i].attr('id') == 'dropdown-menu-position-anchor') { - $("#plusMenu").append(APIsubMenu); + $("#plusMenu").append(APISubmenu); } else if(plusMenuList[i].attr('id') == 'dropdown-menu-position-anchor-version') { - $("#plusMenu").append(VersionsubMenu); + $("#plusMenu").append(versionSubmenu); + } + else if(plusMenuList[i].attr('id') == 'dropdown-menu-position-anchor-docs') { + $("#plusMenu").append(docSubmenu); } else { $("#plusMenu").append("
  • "); @@ -62,8 +86,16 @@ function showTab() { var tab = $($('#main-nav').children().eq(i)); if(!tab.is('a')) tab = tab.find('a').first(); tab.css('border-bottom', '3px solid'); + return; } } + for(var i = 0; i < DOC_TITLE.length; ++i) { + if(url.indexOf(DOC_TITLE[i]) != -1) { + var tab = $($('#main-nav').children().eq(4)); + if(!tab.is('a')) tab = tab.find('a').first(); + tab.css('border-bottom', '3px solid'); + } + } } $(document).ready(function () { @@ -71,5 +103,7 @@ $(document).ready(function () { showTab(); $(window).resize(function () { navbar(); + if($("body").prop("clientWidth") < 1000 || $('div.sphinxsidebar').css('visibility') == 'hidden') $('div.content').css('width', '100%'); + else $('div.content').css('width', 'calc(100% - 300px)'); }); -}); \ No newline at end of file +}); diff --git a/docs/_static/js/options.js b/docs/_static/js/options.js index 77ef94074c57..6e285df88638 100644 --- a/docs/_static/js/options.js +++ b/docs/_static/js/options.js @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + $(document).ready(function () { function label(lbl) { return lbl.replace(/[ .]/g, '-').toLowerCase(); diff --git a/docs/_static/js/page.js b/docs/_static/js/page.js new file mode 100644 index 000000000000..9054bf49ca04 --- /dev/null +++ b/docs/_static/js/page.js @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* Generate url tracking for each page */ +var protocol = location.protocol.concat("//"); +var host = protocol.concat(window.location.host); +var path = window.location.pathname; +var pathArr = path.split('/'); +var icon = ''; +var urlTracker = "
    • MXNet" + icon + "
    • "; + +// Check whether this is another version +var lastUrl = host; +var versionIDX = -1; +for (var i = 1; i < pathArr.length; ++i) { + lastUrl += '/' + pathArr[i]; + if(pathArr[i] == 'versions') { + versionIDX = i; + lastUrl += '/' + pathArr[i + 1]; + break; + } +} +if (versionIDX > 0) { + pathArr = pathArr.slice(versionIDX + 1, pathArr.length); + urlTracker = "
      • MXNet" + icon + "
      • "; +} +else lastUrl = host; + +for (var i = 1; i < pathArr.length; ++i) { + if (pathArr[i] == 'index.html' || pathArr[i].length == 0) continue; + if (pathArr[i].indexOf('#') != -1) pathArr[i] = pathArr[i].substring(0, pathArr[i].indexOf('#')); + lastUrl += '/' + pathArr[i]; + if (pathArr[i].endsWith('.html')) pathArr[i] = pathArr[i].substring(0, pathArr[i].length - 5); + if (i == pathArr.length - 1 || pathArr[i + 1].length == 0 || pathArr[i + 1] == 'index.html') { + if ( pathArr[i] == 'faq' ){ + pathArr[i] = "FAQ"; + } + urlTracker += "
      • " + pathArr[i].replace(/_/g, ' ') + "
      • "; + } + else { + // Check whether current folder has index.html. + // If it doesn't, disable the link. + $.ajax(lastUrl + '/index.html', { + type: "GET", + statusCode: { + 404: function (response) { + if (pathArr[i] == 'api') urlTracker += "
      • API" + icon + "
      • "; + else urlTracker += "
      • " + pathArr[i].replace(/_/g, ' ') + icon + "
      • "; + } + }, + success: function () { + item = pathArr[i] == 'ndarray' ? "NDArray" : pathArr[i]; + urlTracker += "
      • " + item.replace(/_/g, ' ') + "" + icon + "
      • "; + }, + async: false + }); + } +} +urlTracker += '
      '; +$('.page-tracker').append(urlTracker); + +/* Generate top download btn*/ +if ($('div.download-btn').length > 0) { + var topBtn = $('div.download-btn').clone(); + topBtn.addClass('download-btn-top'); + topBtn.insertAfter(".page-tracker"); +} + +/* Adjust footer position */ +var footerHeight = 252; +if ($('div.content-block').height() > $(window).height() - footerHeight) { + $('div.footer').css('position', 'relative'); +} diff --git a/docs/_static/js/search.js b/docs/_static/js/search.js index 9df9702225a2..e9c6e84410b0 100644 --- a/docs/_static/js/search.js +++ b/docs/_static/js/search.js @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + $(document).ready(function () { var searchForm = $("#search-input-wrap").children("form").first(); searchForm.append('
      '); @@ -16,4 +35,4 @@ $(document).ready(function () { $('#searchIcon span').addClass('glyphicon-search'); } }); -}); \ No newline at end of file +}); diff --git a/docs/_static/js/sidebar.js b/docs/_static/js/sidebar.js index 42607068e16e..890f8c36ad6b 100644 --- a/docs/_static/js/sidebar.js +++ b/docs/_static/js/sidebar.js @@ -1,6 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + /*Preprocess*/ var LANG = ['python', 'scala', 'r', 'julia', 'c++', 'perl']; -var TITLE_WITH_LANG = ['/get_started/', '/tutorials/', '/how_to/', '/architecture/']; +var TITLE_WITH_LANG = ['/get_started/', '/tutorials/', '/faq/', '/architecture/', '/community/']; for(var i = 0; i < LANG.length; ++i) { TITLE_WITH_LANG.push('/api/' + LANG[i] + '/'); } @@ -9,22 +28,18 @@ for(var i = 0; i < LANG.length; ++i) { var API_PAGE = ['python']; var isAPI = false; -function render_left_helper(toc, currentText) { +function render_left_helper(toc) { var lefttoc = toc; - var currentText = currentText, trailing = ' Documents'; - if (currentText.endsWith(trailing)) currentText = currentText.substring(0, currentText.length - trailing.length); - if (currentText == 'System') currentText = 'Architecture'; lefttoc.addClass('current'); $('.leftsidebar > .sphinxsidebarwrapper').children().remove(); $('.leftsidebar > .sphinxsidebarwrapper').append(lefttoc); - - $('.leftsidebar > .sphinxsidebarwrapper').prepend('

      Contents

      '); + addToggle('.leftsidebar'); $('.leftsidebar li a').click(function () { - $('.leftsidebar li a').css('color', 'black'); - $(this).css('color', '#337ab7'); + $('.leftsidebar li a').css('color', '#337ab7'); + $(this).css('color', 'black'); }); } @@ -38,6 +53,7 @@ function render_lefttoc() { $('.sphinxsidebar').css("visibility", "visible"); return; } + // If current page is not index page if (url.indexOf(indexTrailing) == -1) { for(var i = 0; i < TITLE_WITH_LANG.length; ++i) { var path = TITLE_WITH_LANG[i]; @@ -50,39 +66,36 @@ function render_lefttoc() { break; } } - var urlPath = 'https://' + window.location.host + version + path; + var protocol = location.protocol.concat("//"); + var urlPath = protocol + window.location.host + version + path; $.get(urlPath + indexTrailing, null, function(data) { - var currentText = $($.parseHTML(data)).find('.leftsidebar > .sphinxsidebarwrapper > ul.current > li.current > a').html(); - if (isAPI) { - render_left_helper($($.parseHTML(data)).find('#table-of-contents > div > ul'), currentText); - } - else { - render_left_helper($($.parseHTML(data)).find('.leftsidebar > .sphinxsidebarwrapper > ul.current > li.current > ul'), currentText); - var tocLink = $('.leftsidebar .sphinxsidebarwrapper .leaf a'); - var staticLink = 'http'; - tocLink.each(function () { - if (!$(this).attr('href').startsWith(staticLink)) { - $(this).attr('href', urlPath + $(this).attr('href')); - } - }); - } + var lastToc = $($.parseHTML(data)).find('.leftsidebar > .sphinxsidebarwrapper > ul.current > li.current > ul') + render_left_helper(lastToc); + var tocLink = $('.leftsidebar .sphinxsidebarwrapper .leaf a'); + var staticLink = 'http'; + tocLink.each(function () { + if (!$(this).attr('href').startsWith(staticLink)) { + $(this).attr('href', urlPath + $(this).attr('href')); + } + }); keepExpand(); $('.sphinxsidebar').css("visibility", "visible"); + if ($('div.sphinxsidebar').css('display') != 'none') $('.content').css('width', 'calc(100% - 300px)'); + else $('.content').css('width', '100%'); }) } } } else { - var currentText = $('.leftsidebar > .sphinxsidebarwrapper > ul.current > li.current > a').html(); - var toc = isAPI ? $('#table-of-contents > div > ul').clone() : $('.leftsidebar > .sphinxsidebarwrapper > ul.current > li.current > ul').clone(); - render_left_helper(toc, currentText); + var toc = $('.leftsidebar > .sphinxsidebarwrapper > ul.current > li.current > ul').clone(); + render_left_helper(toc); $('.sphinxsidebar').css("visibility", "visible"); } } /*Render contents inside page*/ function render_righttoc() { - var url = window.location.href, apiFlag = '/api/', indexTrailing = 'index.html'; + var url = window.location.href, indexTrailing = 'index.html'; var rightTocTitle = "Page Contents"; $("div.rightsidebar > div.sphinxsidebarwrapper > h3").children().remove(); @@ -91,8 +104,8 @@ function render_righttoc() { addToggle('.rightsidebar'); $('.rightsidebar li a').click(function () { - $('.rightsidebar li a').css('color', 'black'); - $(this).css('color', '#337ab7'); + $('.rightsidebar li a').css('color', '#337ab7'); + $(this).css('color', 'black'); }); if (url.indexOf(indexTrailing) != -1 || isAPI) { @@ -107,8 +120,8 @@ function scroll_righttoc() { for(var i = 1; i < links.length; ++i) { var divID = links.eq(i).attr('href'); if ($(divID).offset().top - $(window).scrollTop() > navbarHeight) { - $('.rightsidebar a').css('color', 'black'); - links.eq(i - 1).css('color', '#337ab7'); + $('.rightsidebar a').css('color', '#337ab7'); + links.eq(i - 1).css('color', 'black'); if (!links.eq(i - 1).parent().hasClass('leaf')) { links.eq(i - 1).parent().removeClass('closed'); links.eq(i - 1).parent().addClass('opened'); @@ -173,7 +186,7 @@ function autoExpand(elem) { /*Keep toc expansion while redirecting*/ function keepExpand() { var url = window.location.href, currentEntry; - var entryList = isAPI ? $('.leftsidebar li') : $('.sphinxsidebar li'); + var entryList = $('.sphinxsidebar li'); for(var i = entryList.length - 1; i >= 0; --i) { var entryURL = entryList.eq(i).find('a').first().attr('href'); if (entryURL != '#' && url.indexOf(entryURL) != -1) { @@ -182,12 +195,12 @@ function keepExpand() { } } + //Merge right toc into left toc for API pages since they are quite long if (isAPI) { var rootEntry = currentEntry; - if (rootEntry.parent().parent().is('li')) rootEntry = rootEntry.parent().parent(); - rootEntry.children("ul").first().remove(); rootEntry.append($('.rightsidebar .sphinxsidebarwrapper > ul > li > ul').clone()); - var allEntry = $(".leftsidebar div.sphinxsidebarwrapper li"); + rootEntry.addClass('closed').removeClass('leaf'); + var allEntry = $(".leftsidebar div.sphinxsidebarwrapper li.toctree-l2 li"); allEntry.each(function () { var anchor = $(this).children("a").first(); anchor.click(function () { @@ -201,8 +214,8 @@ function keepExpand() { } }); $('.leftsidebar li a').click(function () { - $('.leftsidebar li a').css('color', 'black'); - $(this).css('color', '#337ab7'); + $('.leftsidebar li a').css('color', '#337ab7'); + $(this).css('color', 'black'); }); } currentEntry.find('a').first().css('color', '#337ab7'); @@ -218,9 +231,9 @@ function keepExpand() { $(document).ready(function () { var url = window.location.href, searchFlag = 'search.html'; + var showRightToc = false; try { - if(url.indexOf('/get_started/') != -1) return; - if (url.indexOf(searchFlag) == -1) { + if (url.indexOf('/get_started/') == -1 && url.indexOf(searchFlag) == -1) { for(var i = 0; i < API_PAGE.length; ++i) { if (url.indexOf('/api/' + API_PAGE[i]) != -1) { isAPI = true; @@ -230,13 +243,31 @@ $(document).ready(function () { render_righttoc(); if ($('.leftsidebar').length) render_lefttoc(); } - - if(url.indexOf('/api/') != -1) return; - $(window).scroll(function () { - scroll_righttoc(); - }); + if ($('div.sphinxsidebar').css('visibility') == 'hidden') $('.content').css('width', '100%'); + if (url.indexOf('/api/') != -1) return; + if (url.indexOf('/install/') != -1) { + $('div.sphinxsidebar').hide(); + $('.content').css('width', '100%'); + } + if (url.indexOf('/gluon/index.html') != -1) { + $('div.sphinxsidebar').hide(); + $('.content').css('width', '100%'); + } + if (showRightToc) { + $(window).scroll(function () { + scroll_righttoc(); + }); + } + else { + $('.rightsidebar').hide(); + } + // move right toc to left if current left toc is empty + if ($('.leftsidebar > .sphinxsidebarwrapper').children().length == 0) { + $('.leftsidebar > .sphinxsidebarwrapper').append($('.rightsidebar > .sphinxsidebarwrapper > ul')); + } } catch(err) { + if ($('div.sphinxsidebar').css('visibility') == 'hidden') $('.content').css('width', '100%'); return; } -}); \ No newline at end of file +}); diff --git a/docs/_static/mxnet-theme/footer.html b/docs/_static/mxnet-theme/footer.html index 45ba457a0722..76d694e8b34f 100644 --- a/docs/_static/mxnet-theme/footer.html +++ b/docs/_static/mxnet-theme/footer.html @@ -1,5 +1,35 @@ -
      + + diff --git a/docs/_static/mxnet-theme/index.html b/docs/_static/mxnet-theme/index.html index e381428758c0..40bd6dff5edc 100644 --- a/docs/_static/mxnet-theme/index.html +++ b/docs/_static/mxnet-theme/index.html @@ -1,13 +1,40 @@ + +
      - - -
      - Install +
      @@ -24,8 +51,8 @@

      Introducing - Gluon

      Learn More
      -

      MXNet 0.10.0 Released

      -

      We're excited to announce the release of MXNet 0.10.0! Check out the release notes for latest updates.

      +

      MXNet 0.12.0 Released

      +

      We're excited to announce the release of MXNet 0.12.0! Check out the release notes for latest updates.

      Learn More
      @@ -41,96 +68,30 @@

      MXNet Joining Apache

      -
      +
      + +

      Model Zoo

      +

      Off the shelf pre-trained models. Fast implementations of many state-of-art models.

      +
      + Model zoo +
      +
      +

      Examples

      -

      Explore projects from simple demos to state-of-the-art research

      +

      Explore projects from simple demos, exmaples, tutorials to state-of-the-art research.

      -
      - -

      Model Zoo

      -

      Off the shelf pre-trained models

      +
      + +

      Tutorials

      +

      These tutorials introduce a few fundamental concepts in deep learning and how to implement them in MXNet.

      - -
      -
      -
      -

      - MXNet is developed by collaborators from multiple universities and - companies. We sincerely thank the following organizations for supporting - MXNet and sponsoring its major developers (alphabetical order). -

      -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      - -
      -
      -
      -
      - -
      - -

      - Apache MXNet is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF. -

      -
      \ No newline at end of file diff --git a/docs/_static/mxnet-theme/layout.html b/docs/_static/mxnet-theme/layout.html index c312181a1476..3d5df27077d5 100644 --- a/docs/_static/mxnet-theme/layout.html +++ b/docs/_static/mxnet-theme/layout.html @@ -1,3 +1,22 @@ + + {%- block doctype -%} {%- endblock %} @@ -148,10 +167,15 @@ - + +
      {%- include "navbar.html" %} + {% if pagename != 'index' %} +
      +
      {% block body %} {% endblock %} - {%- include "footer.html" %}
      - {{ sidebar() }} +
      + {{ sidebar() }}
      + {%- include "footer.html" %} {%- else %} {%- include "index.html" %} {%- include "footer.html" %} {%- endif %} +
      @@ -176,6 +203,7 @@ +