diff --git a/.travis.yml b/.travis.yml index b0aa26c1a3a1..485faadee277 100644 --- a/.travis.yml +++ b/.travis.yml @@ -34,6 +34,7 @@ script: # Temporarily disable travis build due to travis constantly time out, tracked in # https://github:com/apache/incubator-mxnet/issues/16535: - export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + - export MXNET_SUBGRAPH_VERBOSE=0 - mv make/osx.mk config.mk # - make -j 2 diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000000..bbb4505499c1 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + +# Security Policy + +## Reporting a Vulnerability +The Apache Software Foundation takes a very active stance in eliminating security problems and denial of service attacks against its products. + +We strongly encourage folks to report such problems to our private security mailing list first, before disclosing them in a public forum. + +For instructions how to report a security vulnerability, please consult our [security guide](https://mxnet.apache.org/api/faq/security). diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 0112d6bb1704..c2acc0f40d7d 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -1007,6 +1007,7 @@ cd_unittest_ubuntu() { export PYTHONPATH=./python/ export MXNET_MKLDNN_DEBUG=1 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export MXNET_ENABLE_CYTHON=0 export CD_JOB=1 # signal this is a CD run so any unecessary tests can be skipped @@ -1049,6 +1050,7 @@ unittest_ubuntu_python2_cpu_cython() { export PYTHONPATH=./python/ export MXNET_MKLDNN_DEBUG=1 export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export MXNET_ENABLE_CYTHON=1 export MXNET_ENFORCE_CYTHON=1 check_cython 2 @@ -1062,6 +1064,7 @@ unittest_ubuntu_python2_cpu() { export PYTHONPATH=./python/ export MXNET_MKLDNN_DEBUG=0 export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export MXNET_ENABLE_CYTHON=0 nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_train.xml --verbose tests/python/train @@ -1073,6 +1076,7 @@ unittest_ubuntu_python3_cpu() { export PYTHONPATH=./python/ export MXNET_MKLDNN_DEBUG=0 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export MXNET_ENABLE_CYTHON=0 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_quantization.xml --verbose tests/python/quantization @@ -1083,6 +1087,7 @@ unittest_ubuntu_python3_cpu_mkldnn() { export PYTHONPATH=./python/ export MXNET_MKLDNN_DEBUG=0 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export MXNET_ENABLE_CYTHON=0 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_mkl.xml --verbose tests/python/mkl @@ -1093,6 +1098,7 @@ unittest_ubuntu_python2_gpu() { export PYTHONPATH=./python/ export MXNET_MKLDNN_DEBUG=0 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu } @@ -1102,6 +1108,7 @@ unittest_ubuntu_python3_gpu() { export PYTHONPATH=./python/ export MXNET_MKLDNN_DEBUG=0 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} export MXNET_ENABLE_CYTHON=0 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu @@ -1112,6 +1119,7 @@ unittest_ubuntu_python3_gpu_cython() { export PYTHONPATH=./python/ export MXNET_MKLDNN_DEBUG=1 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} export MXNET_ENABLE_CYTHON=1 export MXNET_ENFORCE_CYTHON=1 @@ -1123,6 +1131,7 @@ unittest_ubuntu_python3_gpu_nocudnn() { set -ex export PYTHONPATH=./python/ export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export CUDNN_OFF_TEST_ONLY=true export MXNET_ENABLE_CYTHON=0 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu @@ -1132,6 +1141,7 @@ unittest_ubuntu_tensorrt_gpu() { set -ex export PYTHONPATH=./python/ export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export LD_LIBRARY_PATH=/work/mxnet/lib:$LD_LIBRARY_PATH export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} export MXNET_ENABLE_CYTHON=0 @@ -1146,6 +1156,7 @@ unittest_ubuntu_python2_quantization_gpu() { export PYTHONPATH=./python/ export MXNET_MKLDNN_DEBUG=0 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} export MXNET_ENABLE_CYTHON=0 nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_quantization_gpu.xml --verbose tests/python/quantization_gpu @@ -1158,6 +1169,7 @@ unittest_ubuntu_python3_quantization_gpu() { export PYTHONPATH=./python/ export MXNET_MKLDNN_DEBUG=0 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} export MXNET_ENABLE_CYTHON=0 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_quantization_gpu.xml --verbose tests/python/quantization_gpu @@ -1320,6 +1332,7 @@ integrationtest_ubuntu_gpu_python() { set -ex export PYTHONPATH=./python/ export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 python example/image-classification/test_score.py } @@ -1348,6 +1361,7 @@ integrationtest_ubuntu_cpu_dist_kvstore() { pushd . export PYTHONPATH=./python/ export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export MXNET_USE_OPERATOR_TUNING=0 cd tests/nightly/ ../../tools/launch.py -n 7 --launcher local python dist_sync_kvstore.py --type=gluon_step_cpu @@ -1382,6 +1396,7 @@ integrationtest_ubuntu_gpu_dist_kvstore() { pushd . export PYTHONPATH=./python/ export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 cd tests/nightly/ ../../tools/launch.py -n 4 --launcher local python dist_device_sync_kvstore.py ../../tools/launch.py -n 4 --launcher local python dist_sync_kvstore.py --type=init_gpu @@ -1569,6 +1584,7 @@ nightly_tutorial_test_ubuntu_python3_gpu() { export MXNET_DOCS_BUILD_MXNET=0 make html export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export PYTHONPATH=/work/mxnet/python/ export MXNET_TUTORIAL_TEST_KERNEL=python3 cd /work/mxnet/tests/tutorials @@ -1582,6 +1598,7 @@ nightly_tutorial_test_ubuntu_python2_gpu() { export MXNET_DOCS_BUILD_MXNET=0 make html export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 export PYTHONPATH=/work/mxnet/python/ export MXNET_TUTORIAL_TEST_KERNEL=python2 cd /work/mxnet/tests/tutorials @@ -1975,7 +1992,7 @@ cd_package_pypi() { popd } -# Sanity checks wheel file +# Sanity checks wheel file cd_integration_test_pypi() { set -ex local python_cmd=${1:?"This function requires a python command as the first argument"} diff --git a/ci/windows/test_py2_cpu.ps1 b/ci/windows/test_py2_cpu.ps1 index df9b15ba1ec3..c39d1fa45328 100644 --- a/ci/windows/test_py2_cpu.ps1 +++ b/ci/windows/test_py2_cpu.ps1 @@ -20,6 +20,7 @@ $env:MXNET_LIBRARY_PATH=join-path $pwd.Path windows_package\lib\libmxnet.dll $env:PYTHONPATH=join-path $pwd.Path windows_package\python $env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 +$env:MXNET_SUBGRAPH_VERBOSE=0 $env:MXNET_HOME=[io.path]::combine($PSScriptRoot, 'mxnet_home') C:\Python27\Scripts\pip install -r tests\requirements.txt diff --git a/ci/windows/test_py2_gpu.ps1 b/ci/windows/test_py2_gpu.ps1 index f2974ff6f7b6..b2ea62fc7cd4 100644 --- a/ci/windows/test_py2_gpu.ps1 +++ b/ci/windows/test_py2_gpu.ps1 @@ -20,6 +20,7 @@ $env:MXNET_LIBRARY_PATH=join-path $pwd.Path windows_package\lib\libmxnet.dll $env:PYTHONPATH=join-path $pwd.Path windows_package\python $env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 +$env:MXNET_SUBGRAPH_VERBOSE=0 $env:MXNET_HOME=[io.path]::combine($PSScriptRoot, 'mxnet_home') C:\Python27\Scripts\pip install -r tests\requirements.txt diff --git a/ci/windows/test_py3_cpu.ps1 b/ci/windows/test_py3_cpu.ps1 index 900bfd161cd0..1e09b5c98ce1 100644 --- a/ci/windows/test_py3_cpu.ps1 +++ b/ci/windows/test_py3_cpu.ps1 @@ -20,6 +20,7 @@ $env:MXNET_LIBRARY_PATH=join-path $pwd.Path windows_package\lib\libmxnet.dll $env:PYTHONPATH=join-path $pwd.Path windows_package\python $env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 +$env:MXNET_SUBGRAPH_VERBOSE=0 $env:MXNET_HOME=[io.path]::combine($PSScriptRoot, 'mxnet_home') C:\Python37\Scripts\pip install -r tests\requirements.txt diff --git a/ci/windows/test_py3_gpu.ps1 b/ci/windows/test_py3_gpu.ps1 index b6e951b291fb..9bf7d04d8a88 100644 --- a/ci/windows/test_py3_gpu.ps1 +++ b/ci/windows/test_py3_gpu.ps1 @@ -20,6 +20,7 @@ $env:MXNET_LIBRARY_PATH=join-path $pwd.Path windows_package\lib\libmxnet.dll $env:PYTHONPATH=join-path $pwd.Path windows_package\python $env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 +$env:MXNET_SUBGRAPH_VERBOSE=0 $env:MXNET_HOME=[io.path]::combine($PSScriptRoot, 'mxnet_home') C:\Python37\Scripts\pip install -r tests\requirements.txt diff --git a/cpp-package/README.md b/cpp-package/README.md index 05fb506db42b..77ff0ee36e80 100644 --- a/cpp-package/README.md +++ b/cpp-package/README.md @@ -55,7 +55,7 @@ In order to consume the C++ API please follow the steps below. ## Tutorial -A basic tutorial can be found at . +A basic tutorial can be found at . ## Examples diff --git a/docs/python_docs/python/api/gluon/data/index.rst b/docs/python_docs/python/api/gluon/data/index.rst new file mode 100644 index 000000000000..f9e8a21e69d2 --- /dev/null +++ b/docs/python_docs/python/api/gluon/data/index.rst @@ -0,0 +1,63 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +gluon.data +========== + +.. automodule:: mxnet.gluon.data + +Datasets +-------- + +.. autosummary:: + + Dataset + ArrayDataset + RecordFileDataset + SimpleDataset + +Sampling +-------- + +.. autosummary:: + + Sampler + SequentialSampler + RandomSampler + BatchSampler + +DataLoader +---------- + +.. autosummary:: + + DataLoader + + +API Reference +------------- +.. automodule:: mxnet.gluon.data + :members: + :imported-members: + :autosummary: + +.. toctree:: + :hidden: + :maxdepth: 2 + :glob: + + */index \ No newline at end of file diff --git a/docs/python_docs/python/api/gluon/data/vision/datasets/index.rst b/docs/python_docs/python/api/gluon/data/vision/datasets/index.rst new file mode 100644 index 000000000000..6b007526607a --- /dev/null +++ b/docs/python_docs/python/api/gluon/data/vision/datasets/index.rst @@ -0,0 +1,26 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +vision.datasets +=============== + +Gluon provides pre-defined vision datasets functions in the :py:mod:`mxnet.gluon.data.vision.datasets` +module. + +.. automodule:: mxnet.gluon.data.vision.datasets + :members: + :autosummary: diff --git a/docs/python_docs/python/api/gluon/data/vision/index.rst b/docs/python_docs/python/api/gluon/data/vision/index.rst new file mode 100644 index 000000000000..2731b5f4245a --- /dev/null +++ b/docs/python_docs/python/api/gluon/data/vision/index.rst @@ -0,0 +1,53 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +data.vision +============ + +.. automodule:: mxnet.gluon.data.vision + +Datasets +^^^^^^^^ + +.. autosummary:: + :nosignatures: + + mxnet.gluon.data.vision.datasets + + +Data transformations +^^^^^^^^^^^^^^^^^^^^ + + +.. autosummary:: + :nosignatures: + + mxnet.gluon.data.vision.transforms + + +API Reference +------------- +.. automodule:: mxnet.gluon.data.vision + :members: + :autosummary: + +.. toctree:: + :hidden: + :maxdepth: 2 + :glob: + + */index \ No newline at end of file diff --git a/docs/python_docs/python/api/gluon/data/vision/transforms/index.rst b/docs/python_docs/python/api/gluon/data/vision/transforms/index.rst new file mode 100644 index 000000000000..60d975d87aff --- /dev/null +++ b/docs/python_docs/python/api/gluon/data/vision/transforms/index.rst @@ -0,0 +1,48 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +vision.transforms +================= + +Gluon provides pre-defined vision transformation and data augmentation functions in the :py:mod:`mxnet.gluon.data.vision.transforms` +module. + +.. currentmodule:: mxnet.gluon.data.vision + +.. autosummary:: + :nosignatures: + + transforms.Compose + transforms.Cast + transforms.ToTensor + transforms.Normalize + transforms.RandomResizedCrop + transforms.CenterCrop + transforms.Resize + transforms.RandomFlipLeftRight + transforms.RandomFlipTopBottom + transforms.RandomBrightness + transforms.RandomContrast + transforms.RandomSaturation + transforms.RandomHue + transforms.RandomColorJitter + transforms.RandomLighting + +API Reference +------------- +.. automodule:: mxnet.gluon.data.vision.transforms + :members: diff --git a/docs/python_docs/python/api/mxnet/log/index.rst b/docs/python_docs/python/api/mxnet/log/index.rst new file mode 100644 index 000000000000..fd4d8788c28a --- /dev/null +++ b/docs/python_docs/python/api/mxnet/log/index.rst @@ -0,0 +1,23 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +mxnet.log +========= + +.. automodule:: mxnet.log + :members: + :autosummary: \ No newline at end of file diff --git a/docs/python_docs/python/api/mxnet/model/index.rst b/docs/python_docs/python/api/mxnet/model/index.rst new file mode 100644 index 000000000000..69bcddce6bc1 --- /dev/null +++ b/docs/python_docs/python/api/mxnet/model/index.rst @@ -0,0 +1,23 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +mxnet.model +=========== + +.. automodule:: mxnet.model + :members: + :autosummary: diff --git a/docs/python_docs/python/tutorials/getting-started/crash-course/5-predict.md b/docs/python_docs/python/tutorials/getting-started/crash-course/5-predict.md index 7a7738d8df1b..9afe95b58403 100644 --- a/docs/python_docs/python/tutorials/getting-started/crash-course/5-predict.md +++ b/docs/python_docs/python/tutorials/getting-started/crash-course/5-predict.md @@ -21,7 +21,7 @@ A saved model can be used in multiple places, such as to continue training, to f ## Prerequisites -Please run the [previous tutorial](train.md) to train the network and save its parameters to file. You will need this file to run the following steps. +Please run the [previous tutorial](4-train.html) to train the network and save its parameters to file. You will need this file to run the following steps. ```{.python .input n=1} from mxnet import nd diff --git a/docs/python_docs/python/tutorials/getting-started/crash-course/6-use_gpus.md b/docs/python_docs/python/tutorials/getting-started/crash-course/6-use_gpus.md index b78c38ab7077..a0788ba7df2d 100644 --- a/docs/python_docs/python/tutorials/getting-started/crash-course/6-use_gpus.md +++ b/docs/python_docs/python/tutorials/getting-started/crash-course/6-use_gpus.md @@ -99,7 +99,7 @@ net(x) Finally, we show how to use multiple GPUs to jointly train a neural network through data parallelism. Let's assume there are *n* GPUs. We split each data batch into *n* parts, and then each GPU will run the forward and backward passes using one part of the data. -Let's first copy the data definitions and the transform function from the [previous tutorial](predict.md). +Let's first copy the data definitions and the transform function from the [previous tutorial](5-predict.html). ```{.python .input} batch_size = 256 diff --git a/docs/python_docs/python/tutorials/getting-started/gluon_from_experiment_to_deployment.md b/docs/python_docs/python/tutorials/getting-started/gluon_from_experiment_to_deployment.md index 8d2c4e100c76..b1f65e682263 100644 --- a/docs/python_docs/python/tutorials/getting-started/gluon_from_experiment_to_deployment.md +++ b/docs/python_docs/python/tutorials/getting-started/gluon_from_experiment_to_deployment.md @@ -20,7 +20,7 @@ ## Overview MXNet Gluon API comes with a lot of great features, and it can provide you everything you need: from experimentation to deploying the model. In this tutorial, we will walk you through a common use case on how to build a model using gluon, train it on your data, and deploy it for inference. -This tutorial covers training and inference in Python, please continue to [C++ inference part](https://mxnet.apache.org/versions/master/tutorials/c++/mxnet_cpp_inference_tutorial.html) after you finish. +This tutorial covers training and inference in Python, please continue to [C++ inference part](/api/cpp/docs/tutorials/cpp_inference) after you finish. Let's say you need to build a service that provides flower species recognition. A common problem is that you don't have enough data to train a good model. In such cases, a technique called Transfer Learning can be used to make a more robust model. In Transfer Learning we make use of a pre-trained model that solves a related task, and was trained on a very large standard dataset, such as ImageNet. ImageNet is from a different domain, but we can utilize the knowledge in this pre-trained model to perform the new task at hand. @@ -77,7 +77,7 @@ from mxnet.gluon.data.vision import transforms from mxnet.gluon.model_zoo.vision import resnet50_v2 ``` -Next, we define the hyper-parameters that we will use for fine-tuning. We will use the [MXNet learning rate scheduler](../packages/gluon/training/learning_rates/learning_rate_schedules.html) to adjust learning rates during training. +Next, we define the hyper-parameters that we will use for fine-tuning. We will use the [MXNet learning rate scheduler](/api/python/docs/tutorials/packages/gluon/training/learning_rates/learning_rate_schedules.html) to adjust learning rates during training. Here we set the `epochs` to 1 for quick demonstration, please change to 40 for actual training. ```python @@ -161,7 +161,7 @@ test_data = gluon.data.DataLoader( We will use pre-trained ResNet50_v2 model which was pre-trained on the [ImageNet Dataset](http://www.image-net.org/) with 1000 classes. To match the classes in the Flower dataset, we must redefine the last softmax (output) layer to be 102, then initialize the parameters. -Before we go to training, one unique Gluon feature you should be aware of is hybridization. It allows you to convert your imperative code to a static symbolic graph, which is much more efficient to execute. There are two main benefits of hybridizing your model: better performance and easier serialization for deployment. The best part is that it's as simple as just calling `net.hybridize()`. To know more about Gluon hybridization, please follow the [hybridization tutorial](https://mxnet.apache.org/tutorials/gluon/hybrid.html). +Before we go to training, one unique Gluon feature you should be aware of is hybridization. It allows you to convert your imperative code to a static symbolic graph, which is much more efficient to execute. There are two main benefits of hybridizing your model: better performance and easier serialization for deployment. The best part is that it's as simple as just calling `net.hybridize()`. To know more about Gluon hybridization, please follow the [hybridization tutorial](/api/python/docs/tutorials/packages/gluon/blocks/hybridize.html). @@ -265,7 +265,7 @@ finetune_net.export("flower-recognition", epoch=epochs) ## Load the model and run inference using the MXNet Module API MXNet provides various useful tools and interfaces for deploying your model for inference. For example, you can use [MXNet Model Server](https://github.com/awslabs/mxnet-model-server) to start a service and host your trained model easily. -Besides that, you can also use MXNet's different language APIs to integrate your model with your existing service. We provide [Python](https://mxnet.apache.org/api/python/module/module.html), [Java](https://mxnet.apache.org/api/java/index.html), [Scala](https://mxnet.apache.org/api/scala/index.html), and [C++](https://mxnet.apache.org/api/c++/index.html) APIs. +Besides that, you can also use MXNet's different language APIs to integrate your model with your existing service. We provide [Python](/api/python.html), [Java](/api/java.html), [Scala](/api/scala.html), and [C++](/api/cpp) APIs. Here we will briefly introduce how to run inference using Module API in Python. There is more detailed explanation available in the [Predict Image Tutorial](https://mxnet.apache.org/tutorials/python/predict_image.html). In general, prediction consists of the following steps: @@ -315,7 +315,7 @@ You can continue to the [next tutorial](https://mxnet.apache.org/versions/master You can also find more ways to run inference and deploy your models here: 1. [Java Inference examples](https://github.com/apache/incubator-mxnet/tree/master/scala-package/examples/src/main/java/org/apache/mxnetexamples/javaapi/infer) -2. [Scala Inference examples](https://mxnet.apache.org/tutorials/scala/) +2. [Scala Inference examples](/api/scala/docs/tutorials/infer) 4. [MXNet Model Server Examples](https://github.com/awslabs/mxnet-model-server/tree/master/examples) ## References diff --git a/docs/python_docs/python/tutorials/getting-started/to-mxnet/pytorch.md b/docs/python_docs/python/tutorials/getting-started/to-mxnet/pytorch.md index d7720bac4348..1ab490fbaa42 100644 --- a/docs/python_docs/python/tutorials/getting-started/to-mxnet/pytorch.md +++ b/docs/python_docs/python/tutorials/getting-started/to-mxnet/pytorch.md @@ -164,7 +164,7 @@ mx_trainer = gluon.Trainer(mx_net.collect_params(), 'sgd', {'learning_rate': 0.1}) ``` -The code difference between frameworks is small. The main difference is that in Apache MXNet we use [Trainer](https://mxnet.apache.org/api/python/docs/api/gluon/mxnet.gluon.Trainer.html) class, which accepts optimization algorithm as an argument. We also use [.collect_params()](/api/python/docs/api/gluon/_autogen/mxnet.gluon.nn.Block.collect_params.html) method to get parameters of the network. +The code difference between frameworks is small. The main difference is that in Apache MXNet we use [Trainer](/api/python/docs/api/gluon/trainer.html) class, which accepts optimization algorithm as an argument. We also use [.collect_params()](/api/python/docs/api/gluon/block.html#mxnet.gluon.Block.collect_params) method to get parameters of the network. ### 4. Training @@ -212,13 +212,13 @@ Some of the differences in Apache MXNet when compared to PyTorch are as follows: * In Apache MXNet, you don't need to flatten the 4-D input into 2-D when feeding the data into forward pass. -* In Apache MXNet, you need to perform the calculation within the [autograd.record()](/api/python/docs/api/gluon-related/_autogen/mxnet.autograd.record.html) scope so that it can be automatically differentiated in the backward pass. +* In Apache MXNet, you need to perform the calculation within the [autograd.record()](/api/python/docs/api/autograd/index.html?autograd%20record#mxnet.autograd.record) scope so that it can be automatically differentiated in the backward pass. * It is not necessary to clear the gradient every time as with PyTorch's `trainer.zero_grad()` because by default the new gradient is written in, not accumulated. -* You need to specify the update step size (usually batch size) when performing [step()](/api/python/docs/api/gluon/_autogen/mxnet.gluon.Trainer.step.html) on the trainer. +* You need to specify the update step size (usually batch size) when performing [step()](/api/python/docs/api/gluon/trainer.html?#mxnet.gluon.Trainer.step) on the trainer. -* You need to call [.asscalar()](/api/python/docs/api/ndarray/_autogen/mxnet.ndarray.NDArray.asscalar.html) to turn a multidimensional array into a scalar. +* You need to call [.asscalar()](/api/python/docs/api/ndarray/ndarray.html?#mxnet.ndarray.NDArray.asscalar) to turn a multidimensional array into a scalar. * In this sample, Apache MXNet is twice as fast as PyTorch. Though you need to be cautious with such toy comparisons. @@ -230,9 +230,9 @@ As we saw above, Apache MXNet Gluon API and PyTorch have many similarities. The While Apache MXNet Gluon API is very similar to PyTorch, there are some extra functionality that can make your code even faster. -* Check out [Hybridize tutorial](/api/python/docs/guide/packages/gluon/hybridize.html) to learn how to write imperative code which can be converted to symbolic one. +* Check out [Hybridize tutorial](/api/python/docs/tutorials/packages/gluon/blocks/hybridize.html) to learn how to write imperative code which can be converted to symbolic one. -* Also, check out how to extend Apache MXNet with your own [custom layers](/api/python/docs/guide/extend/custom_layer.html). +* Also, check out how to extend Apache MXNet with your own [custom layers](/api/python/docs/tutorials/packages/gluon/blocks/custom-layer.html?custom_layers). ## Appendix diff --git a/docs/python_docs/python/tutorials/packages/gluon/image/mnist.md b/docs/python_docs/python/tutorials/packages/gluon/image/mnist.md index 8a3d8229413b..39726a3a511c 100644 --- a/docs/python_docs/python/tutorials/packages/gluon/image/mnist.md +++ b/docs/python_docs/python/tutorials/packages/gluon/image/mnist.md @@ -112,8 +112,8 @@ to train the MLP network we defined above. For our training, we will make use of the stochastic gradient descent (SGD) optimizer. In particular, we'll be using mini-batch SGD. Standard SGD processes train data one example at a time. In practice, this is very slow and one can speed up the process by processing examples in small batches. In this case, our batch size will be 100, which is a reasonable choice. Another parameter we select here is the learning rate, which controls the step size the optimizer takes in search of a solution. We'll pick a learning rate of 0.02, again a reasonable choice. Settings such as batch size and learning rate are what are usually referred to as hyper-parameters. What values we give them can have a great impact on training performance. -We will use [Trainer](https://mxnet.io/api/python/docs/api/gluon/mxnet.gluon.Trainer.html) class to apply the -[SGD optimizer](https://mxnet.io/api/python/docs/api/gluon-related/_autogen/mxnet.optimizer.SGD.html) on the +We will use [Trainer](/api/python/docs/api/gluon/trainer.html) class to apply the +[SGD optimizer](/api/python/docs/api/optimizer/index.html#mxnet.optimizer.SGD) on the initialized parameters. ```python diff --git a/docs/python_docs/python/tutorials/packages/ndarray/sparse/csr.md b/docs/python_docs/python/tutorials/packages/ndarray/sparse/csr.md index 0b362513c0ae..b91279cff4d4 100644 --- a/docs/python_docs/python/tutorials/packages/ndarray/sparse/csr.md +++ b/docs/python_docs/python/tutorials/packages/ndarray/sparse/csr.md @@ -556,7 +556,7 @@ except mx.MXNetError as err: ## Next -[Train a Linear Regression Model with Sparse Symbols](http://mxnet.apache.org/tutorials/sparse/train.html) +[Train a Linear Regression Model with Sparse Symbols](/api/python/docs/tutorials/packages/ndarray/sparse/train.html) diff --git a/docs/python_docs/python/tutorials/packages/ndarray/sparse/row_sparse.md b/docs/python_docs/python/tutorials/packages/ndarray/sparse/row_sparse.md index 1241182af85b..7500e82cf9e6 100644 --- a/docs/python_docs/python/tutorials/packages/ndarray/sparse/row_sparse.md +++ b/docs/python_docs/python/tutorials/packages/ndarray/sparse/row_sparse.md @@ -578,7 +578,7 @@ except mx.MXNetError as err: ## Next -[Train a Linear Regression Model with Sparse Symbols](http://mxnet.apache.org/tutorials/sparse/train.html) +[Train a Linear Regression Model with Sparse Symbols](/api/python/docs/tutorials/packages/ndarray/sparse/train.html) diff --git a/docs/python_docs/python/tutorials/packages/ndarray/sparse/train.md b/docs/python_docs/python/tutorials/packages/ndarray/sparse/train.md index 71669e142a4b..336185cf7583 100644 --- a/docs/python_docs/python/tutorials/packages/ndarray/sparse/train.md +++ b/docs/python_docs/python/tutorials/packages/ndarray/sparse/train.md @@ -27,18 +27,18 @@ then train a linear regression model using sparse symbols with the Module API. To complete this tutorial, we need: -- MXNet. See the instructions for your operating system in [Setup and Installation](https://mxnet.io/get_started). +- MXNet. See the instructions for your operating system in [Setup and Installation](/get_started). -- [Jupyter Notebook](https://jupyter.org/index.html) and [Python Requests](http://docs.python-requests.org/en/master/) packages. +- [Jupyter Notebook](https://jupyter.org/index.html) and [Python Requests](https://3.python-requests.org/) packages. ``` pip install jupyter requests ``` - Basic knowledge of Symbol in MXNet. See the detailed tutorial for Symbol in [Symbol - Neural Network Graphs and Auto-differentiation](https://mxnet.apache.org/tutorials/basic/symbol.html). -- Basic knowledge of CSRNDArray in MXNet. See the detailed tutorial for CSRNDArray in [CSRNDArray - NDArray in Compressed Sparse Row Storage Format](https://mxnet.apache.org/versions/master/tutorials/sparse/csr.html). +- Basic knowledge of CSRNDArray in MXNet. See the detailed tutorial for CSRNDArray in [CSRNDArray - NDArray in Compressed Sparse Row Storage Format](/api/python/docs/tutorials/packages/ndarray/sparse/csr.html). -- Basic knowledge of RowSparseNDArray in MXNet. See the detailed tutorial for RowSparseNDArray in [RowSparseNDArray - NDArray for Sparse Gradient Updates](https://mxnet.apache.org/versions/master/tutorials/sparse/row_sparse.html). +- Basic knowledge of RowSparseNDArray in MXNet. See the detailed tutorial for RowSparseNDArray in [RowSparseNDArray - NDArray for Sparse Gradient Updates](/api/python/docs/tutorials/packages/ndarray/sparse/row_sparse.html). ## Variables @@ -155,7 +155,7 @@ f = mx.sym.sparse.elemwise_add(c, c) ### Storage Type Inference What will be the output storage types of sparse symbols? In MXNet, for any sparse symbol, the result storage types are inferred based on storage types of inputs. -You can read the [Sparse Symbol API](https://mxnet.apache.org/versions/master/api/python/symbol/sparse.html) documentation to find what output storage types are. In the example below we will try out the storage types introduced in the Row Sparse and Compressed Sparse Row tutorials: `default` (dense), `csr`, and `row_sparse`. +You can read the [Sparse Symbol API](/api/python/docs/api/symbol/sparse/index.html) documentation to find what output storage types are. In the example below we will try out the storage types introduced in the Row Sparse and Compressed Sparse Row tutorials: `default` (dense), `csr`, and `row_sparse`. ```python diff --git a/docs/static_site/src/_includes/get_started/devices/nvidia-jetson.md b/docs/static_site/src/_includes/get_started/devices/nvidia-jetson.md index fe515f3392d7..40fb1d2e82f5 100644 --- a/docs/static_site/src/_includes/get_started/devices/nvidia-jetson.md +++ b/docs/static_site/src/_includes/get_started/devices/nvidia-jetson.md @@ -1,4 +1,4 @@ # NVIDIA Jetson Devices To install MXNet on a Jetson TX or Nano, please refer to the [Jetson installation -guide](get_started/jetson_setup). \ No newline at end of file +guide](/get_started/jetson_setup). \ No newline at end of file diff --git a/docs/static_site/src/_includes/get_started/get_started.html b/docs/static_site/src/_includes/get_started/get_started.html index 4905d28ce2d3..77367c7ed337 100644 --- a/docs/static_site/src/_includes/get_started/get_started.html +++ b/docs/static_site/src/_includes/get_started/get_started.html @@ -256,8 +256,8 @@

Installing MXNet


- For more installation options, refer to the Ubuntu installation guide and - CentOS installation guide. + For more installation options, refer to the Ubuntu installation guide and + CentOS installation guide. @@ -354,7 +354,7 @@

Installing MXNet


- For more installation options, refer to the MXNet macOS installation guide. + For more installation options, refer to the MXNet macOS installation guide. @@ -440,7 +440,7 @@

Installing MXNet

- For more installation options, refer to the MXNet Windows installation guide. + For more installation options, refer to the MXNet Windows installation guide. diff --git a/docs/static_site/src/_includes/get_started/linux/java/cpu.md b/docs/static_site/src/_includes/get_started/linux/java/cpu.md index 5345a2d754b2..fc6f598fa5ee 100644 --- a/docs/static_site/src/_includes/get_started/linux/java/cpu.md +++ b/docs/static_site/src/_includes/get_started/linux/java/cpu.md @@ -1,6 +1,6 @@ You can use the Maven packages defined in the following dependency to include MXNet in your Java project. The Java API is provided as a subset of the Scala API and is intended for inference only. -Please refer to the MXNet-Java setup guide for a detailed set of +Please refer to the MXNet-Java setup guide for a detailed set of instructions to help you with the setup process. diff --git a/docs/static_site/src/_includes/get_started/linux/java/gpu.md b/docs/static_site/src/_includes/get_started/linux/java/gpu.md index 5e687a353fe4..6f6757f6e2ea 100644 --- a/docs/static_site/src/_includes/get_started/linux/java/gpu.md +++ b/docs/static_site/src/_includes/get_started/linux/java/gpu.md @@ -1,6 +1,6 @@ You can use the Maven packages defined in the following dependency to include MXNet in your Java project. The Java API is provided as a subset of the Scala API and is intended for inference only. -Please refer to the MXNet-Java setup guide for a detailed set of +Please refer to the MXNet-Java setup guide for a detailed set of instructions to help you with the setup process. diff --git a/docs/static_site/src/_includes/get_started/linux/julia/build-from-source.md b/docs/static_site/src/_includes/get_started/linux/julia/build-from-source.md index fbbc0bd248a9..018aca9d7387 100644 --- a/docs/static_site/src/_includes/get_started/linux/julia/build-from-source.md +++ b/docs/static_site/src/_includes/get_started/linux/julia/build-from-source.md @@ -1,2 +1,2 @@ -Refer to the [Julia section of the MXNet Ubuntu installation guide](get_started/ubuntu_setup#install-the-mxnet-package-for-julia). +Refer to the [Julia section of the MXNet Ubuntu installation guide](/get_started/ubuntu_setup#install-the-mxnet-package-for-julia). diff --git a/docs/static_site/src/_includes/get_started/linux/r/cpu.md b/docs/static_site/src/_includes/get_started/linux/r/cpu.md index c0a4e015b61d..88ca5dd39933 100644 --- a/docs/static_site/src/_includes/get_started/linux/r/cpu.md +++ b/docs/static_site/src/_includes/get_started/linux/r/cpu.md @@ -1,5 +1,5 @@ The default version of R that is installed with `apt-get` is insufficient. You will need -to first [install R v3.4.4+ and build MXNet from source](get_started/ubuntu_setup.html#install-the-mxnet-package-for-r). +to first [install R v3.4.4+ and build MXNet from source](/get_started/ubuntu_setup.html#install-the-mxnet-package-for-r). After you have setup R v3.4.4+ and MXNet, you can build and install the MXNet R bindings with the following, assuming that `incubator-mxnet` is the source directory you used to build MXNet as follows: diff --git a/docs/static_site/src/_includes/get_started/linux/r/gpu.md b/docs/static_site/src/_includes/get_started/linux/r/gpu.md index 57afe7a8d65e..16fbfd09d4d4 100644 --- a/docs/static_site/src/_includes/get_started/linux/r/gpu.md +++ b/docs/static_site/src/_includes/get_started/linux/r/gpu.md @@ -1,7 +1,7 @@ The default version of R that is installed with `apt-get` is insufficient. You will need to first [install R v3.4.4+ and build MXNet from -source](get_started/ubuntu_setup.html#install-the-mxnet-package-for-r). +source](/get_started/ubuntu_setup.html#install-the-mxnet-package-for-r). After you have setup R v3.4.4+ and MXNet, you can build and install the MXNet R bindings with the diff --git a/docs/static_site/src/_includes/get_started/macos/java/cpu.md b/docs/static_site/src/_includes/get_started/macos/java/cpu.md index 2050149fd33d..002037a15771 100644 --- a/docs/static_site/src/_includes/get_started/macos/java/cpu.md +++ b/docs/static_site/src/_includes/get_started/macos/java/cpu.md @@ -1,7 +1,7 @@ You can use the Maven packages defined in the following dependency to include MXNet in your Java project. The Java API is provided as a subset of the Scala API and is intended for inference only. -Please refer to the [MXNet-Java setup guide](get_started/java_setup.html) for a detailed set of instructions to help you with the setup process. +Please refer to the [MXNet-Java setup guide](/get_started/java_setup.html) for a detailed set of instructions to help you with the setup process. PyPI for -other MXNet pip packages, or validate your MXNet installation. +other MXNet pip packages, or validate your MXNet installation.

Download from source

-

The signed source code for Apache MXNet (incubating) is available for download here

+

The signed source code for Apache MXNet (incubating) is available for download here

diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 177ec5d40146..ac0c6726f2c7 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -1170,7 +1170,7 @@ MXNET_DLL int MXAutogradIsTraining(bool* curr); * \param curr returns the current status * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIsNumpyShape(bool* curr); +MXNET_DLL int MXIsNumpyShape(int* curr); /*! * \brief set numpy compatibility switch * \param is_np_shape 1 when numpy shape semantics is thread local on, diff --git a/include/mxnet/imperative.h b/include/mxnet/imperative.h index 18f6424e54f7..dbd81e575872 100644 --- a/include/mxnet/imperative.h +++ b/include/mxnet/imperative.h @@ -108,12 +108,14 @@ class Imperative { is_recording_ = is_recording; return old; } - /*! \brief whether numpy compatibility is on. */ - bool is_np_shape() const { + /*! \brief return current numpy compatibility status, + * GlobalOn(2), ThreadLocalOn(1), Off(0). + * */ + int is_np_shape() const { if (is_np_shape_global_) { - return true; + return 2; } - return is_np_shape_thread_local_; + return is_np_shape_thread_local_ ? 1 : 0; } /*! \brief specify numpy compatibility off, thread local on or global on. */ bool set_is_np_shape(int is_np_shape) { diff --git a/julia/docs/Project.toml b/julia/docs/Project.toml index a4b243b0ffea..023a222beba6 100644 --- a/julia/docs/Project.toml +++ b/julia/docs/Project.toml @@ -4,4 +4,4 @@ DocumenterMarkdown = "997ab1e6-3595-5248-9280-8efb232c3433" MXNet = "a7949054-b901-59c6-b8e3-7238c29bf7f0" [compat] -Documenter = "~0.21" +Documenter = "~0.23" diff --git a/julia/docs/make.jl b/julia/docs/make.jl index 3e541c636888..3ea9b07d1056 100644 --- a/julia/docs/make.jl +++ b/julia/docs/make.jl @@ -19,6 +19,39 @@ using Documenter using DocumenterMarkdown using MXNet +""" +Return all files of a submodule + +julia> listpages("ndarray") +15-element Array{String,1}: + "ndarray.jl" + "ndarray/activation.jl" + "ndarray/arithmetic.jl" + "ndarray/array.jl" + ... + "ndarray/statistic.jl" + "ndarray/trig.jl" + "ndarray/type.jl" +""" +listpages(x) = + ["$x.jl"; joinpath.(x, readdir(joinpath(@__DIR__, "..", "src", x)))] + +const api_pages = [ + "api/context.md", + "api/ndarray.md", + "api/symbolic-node.md", + "api/model.md", + "api/initializers.md", + "api/optimizers.md", + "api/callbacks.md", + "api/metric.md", + "api/io.md", + "api/nn-factory.md", + "api/executor.md", + "api/kvstore.md", + "api/visualize.md", +] + makedocs( sitename = "MXNet.jl", modules = MXNet, diff --git a/julia/docs/mkdocs.yml b/julia/docs/mkdocs.yml index 22cb71869673..383505621540 100644 --- a/julia/docs/mkdocs.yml +++ b/julia/docs/mkdocs.yml @@ -62,4 +62,5 @@ nav: - Symbolic API: api/symbolic-node.md - Neural Networks Factory: api/nn-factory.md - Executor: api/executor.md + - Key-Value Store: api/kvstore.md - Network Visualization: api/visualize.md diff --git a/julia/docs/src/api.md b/julia/docs/src/api.md index 60cb0831d1bf..04cfadd6d698 100644 --- a/julia/docs/src/api.md +++ b/julia/docs/src/api.md @@ -18,18 +18,5 @@ # API Documentation ```@contents -Pages = [ - "api/symbolic-node.md", - "api/ndarray.md", - "api/context.md", - "api/model.md", - "api/initializers.md", - "api/optimizers.md", - "api/callbacks.md", - "api/metric.md", - "api/io.md", - "api/nn-factory.md", - "api/executor.md", - "api/visualize.md", -] +Pages = api_pages ``` diff --git a/julia/docs/src/api/ndarray.md b/julia/docs/src/api/ndarray.md index 64f59dc5393e..640e8b3ec372 100644 --- a/julia/docs/src/api/ndarray.md +++ b/julia/docs/src/api/ndarray.md @@ -19,7 +19,7 @@ ## Arithmetic Operations -In the following example `y` can be a `Real` value or another `NDArray` +In the following example `y` can be a `Real` value or another `NDArray`. | API | Example | | |-----|----------|----------------------------| @@ -70,21 +70,5 @@ In the following example `y` can be a `Real` value or another `NDArray` ```@autodocs Modules = [MXNet.mx] -Pages = [ - "ndarray.jl", - "ndarray/activation.jl", - "ndarray/arithmetic.jl", - "ndarray/array.jl", - "ndarray/autoimport.jl", - "ndarray/comparison.jl", - "ndarray/context.jl", - "ndarray/io.jl", - "ndarray/linalg.jl", - "ndarray/reduction.jl", - "ndarray/remap.jl", - "ndarray/show.jl", - "ndarray/statistic.jl", - "ndarray/trig.jl", - "ndarray/type.jl", -] +Pages = listpages("ndarray") ``` diff --git a/julia/docs/src/api/symbolic-node.md b/julia/docs/src/api/symbolic-node.md index 0efe4605c414..785dda87fbde 100644 --- a/julia/docs/src/api/symbolic-node.md +++ b/julia/docs/src/api/symbolic-node.md @@ -19,14 +19,5 @@ ```@autodocs Modules = [MXNet.mx] -Pages = [ - "symbolic-node.jl", - "symbolic-node/arithmetic.jl", - "symbolic-node/array.jl", - "symbolic-node/autodiff.jl", - "symbolic-node/io.jl", - "symbolic-node/op.jl", - "symbolic-node/show.jl", - "symbolic-node/type.jl", -] +Pages = listpages("symbolic-node") ``` diff --git a/julia/docs/src/index.md b/julia/docs/src/index.md index aacd844cc38e..4213265b4bd4 100644 --- a/julia/docs/src/index.md +++ b/julia/docs/src/index.md @@ -55,18 +55,6 @@ Depth = 2 ## API Documentation ```@contents -Pages = [ - "api/context.md", - "api/ndarray.md", - "api/symbolic-node.md", - "api/model.md", - "api/initializers.md", - "api/optimizers.md", - "api/callbacks.md", - "api/metric.md", - "api/io.md", - "api/nn-factory.md", - "api/executor.md", - "api/visualize.md", -] +Pages = api_pages +Depth = 2 ``` diff --git a/julia/docs/src/tutorial/char-lstm.md b/julia/docs/src/tutorial/char-lstm.md index bc7f7b471d94..ab7e9352b5ab 100644 --- a/julia/docs/src/tutorial/char-lstm.md +++ b/julia/docs/src/tutorial/char-lstm.md @@ -31,7 +31,7 @@ networks yet, the example shown here is an implementation of LSTM by using the default FeedForward model via explicitly unfolding over time. We will be using fixed-length input sequence for training. The code is adapted from the [char-rnn example for MXNet's Python -binding](https://github.com/dmlc/mxnet/blob/master/example/rnn/char_lstm.ipynb), +binding](https://github.com/dmlc/mxnet-notebooks/blob/master/python/tutorials/char_lstm.ipynb), which demonstrates how to use low-level [Symbolic API](@ref) to build customized neural network models directly. @@ -165,7 +165,7 @@ char-lstm. To train the model, we just follow the standard high-level API. Firstly, we construct a LSTM symbolic architecture: Note all the parameters are defined in -[examples/char-lstm/config.jl](https://github.com/dmlc/MXNet.jl/blob/master/examples/char-lstm/config.jl). +[examples/char-lstm/config.jl](https://github.com/apache/incubator-mxnet/blob/master/julia/examples/char-lstm/config.jl). Now we load the text file and define the data provider. The data `input.txt` we used in this example is [a tiny Shakespeare dataset](https://github.com/dmlc/web-data/tree/master/mxnet/tinyshakespeare). @@ -318,6 +318,6 @@ illustrations](http://colah.github.io/posts/2015-08-Understanding-LSTMs/), but could otherwise be very useful for debugging. As we can see, the LSTM unfolded over time is just a (very) deep neural network. The complete code for producing this visualization can be found in -[examples/char-lstm/visualize.jl](https://github.com/apache/incubator-mxnet/tree/master/julia/examples/char-lstmvisualize.jl). +[examples/char-lstm/visualize.jl](https://github.com/apache/incubator-mxnet/blob/master/julia/examples/char-lstm/visualize.jl). ![image](images/char-lstm-vis.svg) diff --git a/julia/docs/src/tutorial/mnist.md b/julia/docs/src/tutorial/mnist.md index cc5267071f11..a404f75efe12 100644 --- a/julia/docs/src/tutorial/mnist.md +++ b/julia/docs/src/tutorial/mnist.md @@ -23,7 +23,7 @@ multi-layer perceptron and then a convolutional neural network (the LeNet architecture) on the [MNIST handwritten digit dataset](http://yann.lecun.com/exdb/mnist/). The code for this tutorial could be found in -[examples/mnist](https://github.com/dmlc/MXNet.jl/tree/master/examples/mnist). There are also two Jupyter notebooks that expand a little more on the [MLP](https://github.com/ultradian/julia_notebooks/blob/master/mxnet/mnistMLP.ipynb) and the [LeNet](https://github.com/ultradian/julia_notebooks/blob/master/mxnet/mnistLenet.ipynb), using the more general `ArrayDataProvider`. +[examples/mnist](/api/julia/docs/api/tutorial/mnist/). There are also two Jupyter notebooks that expand a little more on the [MLP](https://github.com/ultradian/julia_notebooks/blob/master/mxnet/mnistMLP.ipynb) and the [LeNet](https://github.com/ultradian/julia_notebooks/blob/master/mxnet/mnistLenet.ipynb), using the more general `ArrayDataProvider`. Simple 3-layer MLP ------------------ diff --git a/julia/docs/src/user-guide/overview.md b/julia/docs/src/user-guide/overview.md index 974cc7dee974..342448a15bed 100644 --- a/julia/docs/src/user-guide/overview.md +++ b/julia/docs/src/user-guide/overview.md @@ -269,8 +269,6 @@ symbolic composition system. It is like [Theano](http://deeplearning.net/software/theano/), except that we avoided long expression compilation time by providing *larger* neural network related building blocks to guarantee computation performance. -See also [this note](https://mxnet.readthedocs.org/en/latest/program_model.html) -for the design and trade-off of the MXNet symbolic composition system. The basic type is `mx.SymbolicNode`. The following is a trivial example of composing two symbols with the `+` operation. diff --git a/julia/examples/char-lstm/README.md b/julia/examples/char-lstm/README.md index ac745dd4cc41..155f29603623 100644 --- a/julia/examples/char-lstm/README.md +++ b/julia/examples/char-lstm/README.md @@ -29,7 +29,7 @@ and `StatsBase.jl`. ## Training This example is adapted from the -[example in Python binding](https://github.com/dmlc/mxnet/blob/master/example/rnn/char_lstm.ipynb) of +[example in Python binding](https://github.com/dmlc/mxnet-notebooks/blob/master/python/tutorials/char_lstm.ipynb) of MXNet. The data `input.txt` can be downloaded [here](https://github.com/dmlc/web-data/tree/master/mxnet/tinyshakespeare). Modify parameters in [config.jl](config.jl) and then run [train.jl](train.jl). An example output diff --git a/julia/src/executor.jl b/julia/src/executor.jl index 37f2dde615b8..7f6c2bb5aa58 100644 --- a/julia/src/executor.jl +++ b/julia/src/executor.jl @@ -245,7 +245,7 @@ Total 11 TempSpace resource requested ``` """ Base.print(io::IO, x::Executor) = print(io, debug_str(x)) -Base.print(x::Executor) = print(STDOUT, x) +Base.print(x::Executor) = print(stdout, x) function debug_str(x::Executor) s_ref = Ref{Cstring}(C_NULL) diff --git a/julia/src/symbolic-node/show.jl b/julia/src/symbolic-node/show.jl index f07c6b4655ee..9d40ea124505 100644 --- a/julia/src/symbolic-node/show.jl +++ b/julia/src/symbolic-node/show.jl @@ -57,6 +57,6 @@ function Base.print(io::IO, sym::SymbolicNode) print(io, unsafe_string(out[])) end -Base.print(sym::SymbolicNode) = print(STDOUT, sym) +Base.print(sym::SymbolicNode) = print(stdout, sym) diff --git a/perl-package/AI-MXNet/t/test_autograd.t b/perl-package/AI-MXNet/t/test_autograd.t index 931c6d59333b..2ddad60df989 100644 --- a/perl-package/AI-MXNet/t/test_autograd.t +++ b/perl-package/AI-MXNet/t/test_autograd.t @@ -23,6 +23,7 @@ use AI::MXNet::TestUtils qw(same almost_equal rand_ndarray); use AI::MXNet::Base qw(:DEFAULT pones); use Test::More tests => 246; $ENV{MXNET_STORAGE_FALLBACK_LOG_VERBOSE} = 0; +$ENV{MXNET_SUBGRAPH_VERBOSE} = 0; sub autograd_assert { diff --git a/perl-package/AI-MXNet/t/test_gluon_trainer.t b/perl-package/AI-MXNet/t/test_gluon_trainer.t index 81113af28c20..3b1130af4ecf 100644 --- a/perl-package/AI-MXNet/t/test_gluon_trainer.t +++ b/perl-package/AI-MXNet/t/test_gluon_trainer.t @@ -25,6 +25,7 @@ use AI::MXNet::TestUtils qw(almost_equal dies_ok); use Scalar::Util qw(refaddr); use AI::MXNet::Base; $ENV{MXNET_STORAGE_FALLBACK_LOG_VERBOSE} = 0; +$ENV{MXNET_SUBGRAPH_VERBOSE} = 0; sub test_multi_trainer { @@ -252,4 +253,3 @@ sub test_trainer_reset_kv } test_trainer_reset_kv(); - diff --git a/perl-package/AI-MXNet/t/test_module.t b/perl-package/AI-MXNet/t/test_module.t index 3bbd8fdc4ea4..55e098683399 100644 --- a/perl-package/AI-MXNet/t/test_module.t +++ b/perl-package/AI-MXNet/t/test_module.t @@ -22,6 +22,7 @@ use AI::MXNet qw(mx); use AI::MXNet::Base; use AI::MXNet::TestUtils qw(almost_equal enumerate same_array dies_like rand_ndarray); $ENV{MXNET_STORAGE_FALLBACK_LOG_VERBOSE} = 0; +$ENV{MXNET_SUBGRAPH_VERBOSE} = 0; sub test_module_layout { diff --git a/perl-package/AI-MXNet/t/test_sparse_ndarray.t b/perl-package/AI-MXNet/t/test_sparse_ndarray.t index f143346b4890..afb0b25aa816 100644 --- a/perl-package/AI-MXNet/t/test_sparse_ndarray.t +++ b/perl-package/AI-MXNet/t/test_sparse_ndarray.t @@ -24,6 +24,7 @@ use AI::MXNet::TestUtils qw(zip assert enumerate same rand_shape_2d rand_shape_3 rand_sparse_ndarray random_arrays almost_equal rand_ndarray randint allclose dies_ok); use AI::MXNet::Base qw(pones pzeros pdl product rand_sparse); $ENV{MXNET_STORAGE_FALLBACK_LOG_VERBOSE} = 0; +$ENV{MXNET_SUBGRAPH_VERBOSE} = 0; sub sparse_nd_ones diff --git a/python/mxnet/kvstore.py b/python/mxnet/kvstore.py index 5d332ff45ecb..61c64ec0984f 100644 --- a/python/mxnet/kvstore.py +++ b/python/mxnet/kvstore.py @@ -31,8 +31,7 @@ from .profiler import set_kvstore_handle def _ctype_key_value(keys, vals): - """ - Returns ctype arrays for the key-value args, and the whether string keys are used. + """Returns ctype arrays for the key-value args, and the whether string keys are used. For internal use only. """ if isinstance(keys, (tuple, list)): @@ -66,9 +65,7 @@ def _ctype_key_value(keys, vals): return (c_keys, c_handle_array(vals), use_str_keys) def _ctype_dict(param_dict): - """ - Returns ctype arrays for keys and values(converted to strings) in a dictionary - """ + """Returns ctype arrays for keys and values(converted to strings) in a dictionary""" assert(isinstance(param_dict, dict)), \ "unexpected type for param_dict: " + str(type(param_dict)) c_keys = c_array(ctypes.c_char_p, [c_str(k) for k in param_dict.keys()]) diff --git a/python/mxnet/metric.py b/python/mxnet/metric.py index 07ec2ef4d61d..6e2d66cb9d15 100644 --- a/python/mxnet/metric.py +++ b/python/mxnet/metric.py @@ -153,8 +153,7 @@ def reset(self): self.global_sum_metric = 0.0 def reset_local(self): - """Resets the local portion of the internal evaluation results - to initial state.""" + """Resets the local portion of the internal evaluation results to initial state.""" self.num_inst = 0 self.sum_metric = 0.0 @@ -372,8 +371,7 @@ def reset(self): pass def reset_local(self): - """Resets the local portion of the internal evaluation results - to initial state.""" + """Resets the local portion of the internal evaluation results to initial state.""" try: for metric in self.metrics: metric.reset_local() @@ -592,8 +590,7 @@ def update(self, labels, preds): class _BinaryClassificationMetrics(object): - """ - Private container class for classification metric statistics. True/false positive and + """Private container class for classification metric statistics. True/false positive and true/false negative counts are sufficient statistics for various classification metrics. This class provides the machinery to track those statistics across mini-batches of (label, prediction) pairs. @@ -610,9 +607,7 @@ def __init__(self): self.global_true_negatives = 0 def update_binary_stats(self, label, pred): - """ - Update various binary classification counts for a single (label, pred) - pair. + """Update various binary classification counts for a single (label, pred) pair. Parameters ---------- @@ -691,9 +686,7 @@ def global_fscore(self): return 0. def matthewscc(self, use_global=False): - """ - Calculate the Matthew's Correlation Coefficent - """ + """Calculate the Matthew's Correlation Coefficent""" if use_global: if not self.global_total_examples: return 0. @@ -1604,8 +1597,7 @@ def reset(self): self.reset_local() def reset_local(self): - """Resets the local portion of the internal evaluation results - to initial state.""" + """Resets the local portion of the internal evaluation results to initial state.""" self.num_inst = 0. self.lcm = numpy.zeros((self.k, self.k)) diff --git a/python/mxnet/profiler.py b/python/mxnet/profiler.py index 7dbc060ed60f..8e8ac87c9e06 100644 --- a/python/mxnet/profiler.py +++ b/python/mxnet/profiler.py @@ -207,8 +207,7 @@ def pause(profile_process='worker'): def resume(profile_process='worker'): - """ - Resume paused profiling. + """Resume paused profiling. Parameters ---------- diff --git a/python/mxnet/rtc.py b/python/mxnet/rtc.py index 4dea0e656b7e..5dfc5ea6dfe2 100644 --- a/python/mxnet/rtc.py +++ b/python/mxnet/rtc.py @@ -172,7 +172,8 @@ def get_kernel(self, name, signature): class CudaKernel(object): """Constructs CUDA kernel. Should be created by `CudaModule.get_kernel`, - not intended to be used by users.""" + not intended to be used by users. + """ def __init__(self, handle, name, is_ndarray, dtypes): self.handle = handle self._name = name diff --git a/python/mxnet/runtime.py b/python/mxnet/runtime.py index 0f7de76937c0..f2e98fe674fa 100644 --- a/python/mxnet/runtime.py +++ b/python/mxnet/runtime.py @@ -26,9 +26,7 @@ from .base import _LIB, check_call class Feature(ctypes.Structure): - """ - Compile time feature description, member fields: `name` and `enabled`. - """ + """Compile time feature description, member fields: `name` and `enabled`.""" _fields_ = [ ("_name", ctypes.c_char_p), ("_enabled", ctypes.c_bool) @@ -36,16 +34,12 @@ class Feature(ctypes.Structure): @property def name(self): - """ - Feature name. - """ + """Feature name.""" return self._name.decode() @property def enabled(self): - """ - True if MXNet was compiled with the given compile-time feature. - """ + """True if MXNet was compiled with the given compile-time feature.""" return self._enabled def __repr__(self): @@ -55,8 +49,7 @@ def __repr__(self): return "✖ {}".format(self.name) def feature_list(): - """ - Check the library for compile-time features. The list of features are maintained in libinfo.h and libinfo.cc + """Check the library for compile-time features. The list of features are maintained in libinfo.h and libinfo.cc Returns ------- @@ -70,9 +63,7 @@ def feature_list(): return features class Features(collections.OrderedDict): - """ - OrderedDict of name to Feature - """ + """OrderedDict of name to Feature""" instance = None def __new__(cls): if cls.instance is None: @@ -84,8 +75,7 @@ def __repr__(self): return str(list(self.values())) def is_enabled(self, feature_name): - """ - Check for a particular feature by name + """Check for a particular feature by name Parameters ---------- diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py index 4862aee8570d..6c8fefca4490 100644 --- a/python/mxnet/test_utils.py +++ b/python/mxnet/test_utils.py @@ -1935,8 +1935,7 @@ def same_array(array1, array2): @contextmanager def discard_stderr(): - """ - Discards error output of a routine if invoked as: + """Discards error output of a routine if invoked as: with discard_stderr(): ... @@ -2324,7 +2323,8 @@ def __exit__(self, ptype, value, trace): def collapse_sum_like(a, shape): """Given `a` as a numpy ndarray, perform reduce_sum on `a` over the axes that do not - exist in `shape`. Note that an ndarray with `shape` must be broadcastable to `a`.""" + exist in `shape`. Note that an ndarray with `shape` must be broadcastable to `a`. + """ assert len(a.shape) >= len(shape) if np.prod(shape) == 0 or a.size == 0: return np.zeros(shape, dtype=a.dtype) @@ -2349,7 +2349,8 @@ def is_cd_run(): def has_tvm_ops(): """Returns True if MXNet is compiled with TVM generated operators. If current ctx - is GPU, it only returns True for CUDA compute capability > 52 where FP16 is supported.""" + is GPU, it only returns True for CUDA compute capability > 52 where FP16 is supported. + """ built_with_tvm_op = _features.is_enabled("TVM_OP") ctx = current_context() if ctx.device_type == 'gpu': @@ -2367,7 +2368,8 @@ def has_tvm_ops(): def is_op_runnable(): """Returns True for all CPU tests. Returns True for GPU tests that are either of the following. 1. Built with USE_TVM_OP=0. - 2. Built with USE_TVM_OP=1, but with compute capability >= 53.""" + 2. Built with USE_TVM_OP=1, but with compute capability >= 53. + """ ctx = current_context() if ctx.device_type == 'gpu': if not _features.is_enabled("TVM_OP"): diff --git a/python/mxnet/util.py b/python/mxnet/util.py index cef034fd0caa..9e15caae9698 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -60,8 +60,7 @@ def get_gpu_memory(gpu_dev_id): def set_np_shape(active): - """ - Turns on/off NumPy shape semantics, in which `()` represents the shape of scalar tensors, + """Turns on/off NumPy shape semantics, in which `()` represents the shape of scalar tensors, and tuples with `0` elements, for example, `(0,)`, `(1, 0, 2)`, represent the shapes of zero-size tensors. This is turned off by default for keeping backward compatibility. @@ -568,8 +567,7 @@ def hybrid_forward(self, F, x, w): def np_ufunc_legal_option(key, value): - """ - Checking if ufunc arguments are legal inputs + """Checking if ufunc arguments are legal inputs Parameters ---------- diff --git a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc index 5c704c9646a2..26eea3dd062b 100644 --- a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc +++ b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc @@ -2777,9 +2777,9 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxDumpProfile // Numpy JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxIsNumpyShape (JNIEnv *env, jobject obj, jobject compatibleRef) { - bool isNumpyShape; + int isNumpyShape; int ret = MXIsNumpyShape(&isNumpyShape); - SetIntField(env, compatibleRef, static_cast(isNumpyShape)); + SetIntField(env, compatibleRef, isNumpyShape); return ret; } diff --git a/src/c_api/c_api_ndarray.cc b/src/c_api/c_api_ndarray.cc index b80e17c18071..de208c0fed99 100644 --- a/src/c_api/c_api_ndarray.cc +++ b/src/c_api/c_api_ndarray.cc @@ -276,7 +276,7 @@ int MXAutogradSetIsRecording(int is_recording, int* prev) { API_END(); } -int MXIsNumpyShape(bool* curr) { +int MXIsNumpyShape(int* curr) { API_BEGIN(); *curr = Imperative::Get()->is_np_shape(); API_END(); diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc index d92253266f35..882105da1321 100644 --- a/src/executor/graph_executor.cc +++ b/src/executor/graph_executor.cc @@ -1627,16 +1627,16 @@ static nnvm::Graph InferForwardAttrs(nnvm::Graph g, static bool SubgraphBackendCheck(const op::SubgraphBackendPtr& backend, const Context& default_ctx, - bool verbose = false) { + int verbose = 1) { if (backend->HasAttr("enable") && (backend->GetAttr("enable") != true)) { - if (verbose) { + if (verbose > 1) { LOG(INFO) << "Subgraph backend " << backend->GetName() << " isn't activated."; } return false; } if (backend->HasAttr("context") && backend->GetAttr("context") != default_ctx) { - if (verbose) { + if (verbose > 1) { LOG(INFO) << "Subgraph backend " << backend->GetName() << " isn't activated as context mismatch."; } @@ -1647,7 +1647,7 @@ static bool SubgraphBackendCheck(const op::SubgraphBackendPtr& backend, static bool SubgraphPropertyCheck(const std::string& backend_name, const op::SubgraphPropertyPtr& prop, bool need_grad, - bool verbose = false) { + int verbose = 1) { auto full_name = prop->HasAttr("property_name") ? prop->GetAttr("property_name") : std::string(); if (prop->HasAttr("disable") && prop->GetAttr("disable") == true) { @@ -1657,7 +1657,7 @@ static bool SubgraphPropertyCheck(const std::string& backend_name, } if (prop->HasAttr("inference_only") && prop->GetAttr("inference_only") == true) { if (need_grad) { - if (verbose) { + if (verbose > 1) { LOG(INFO) << "skip partitioning graph with subgraph property " << full_name << " from backend " << backend_name << " as it requires `grad_req=null`."; } @@ -1699,7 +1699,7 @@ static nnvm::Symbol BuildSubgraph( const std::unordered_map& arg_stype_map, const Context& default_ctx, const std::map& ctx_map, std::vector* in_arg_ctxes, std::vector* arg_grad_ctxes, std::vector* grad_req_types, - std::vector* aux_state_ctxes, bool verbose = false) { + std::vector* aux_state_ctxes, int verbose = 1) { // setup map for in_arg_ctxes, arg_grad_ctxes, aux_state_ctxes and grad_req_types std::unordered_map in_arg_ctx_map; std::unordered_map arg_grad_ctx_map; @@ -1794,7 +1794,7 @@ static nnvm::Symbol BuildSubgraph(const nnvm::Symbol& src, const op::SubgraphBac std::vector* in_args, std::vector* arg_grad_store, std::vector* grad_req_type, - std::vector* aux_states, bool verbose = false) { + std::vector* aux_states, int verbose = 1) { // setup map for in_args, arg_grad_store, grad_req_type and aux_states std::unordered_map in_args_map; std::unordered_map arg_grad_store_map; @@ -1929,11 +1929,11 @@ Executor *Executor::SimpleBind(nnvm::Symbol symbol, auto exec = new exec::GraphExecutor(); bool init = false; if (!exec->subgraph_property().empty()) { - static bool verbose = dmlc::GetEnv("MXNET_SUBGRAPH_VERBOSE", false); + static int verbose = dmlc::GetEnv("MXNET_SUBGRAPH_VERBOSE", 1); const auto& backend_name = exec->subgraph_property(); const auto& backend = op::SubgraphBackendRegistry::Get()->GetSubgraphBackend(backend_name); if (exec::SubgraphBackendCheck(backend, default_ctx, verbose)) { - LOG(INFO) << "Subgraph backend " << backend_name << " is activated."; + if (verbose) LOG(INFO) << "Subgraph backend " << backend_name << " is activated."; std::vector tmp_in_arg_ctxes = in_arg_ctxes; std::vector tmp_arg_grad_ctxes = arg_grad_ctxes; std::vector tmp_aux_state_ctxes = aux_state_ctxes; @@ -2001,7 +2001,7 @@ Executor *Executor::Bind(nnvm::Symbol symbol, const std::vector &aux_states, Executor* shared_exec) { auto exec = new exec::GraphExecutor(); - static bool verbose = dmlc::GetEnv("MXNET_SUBGRAPH_VERBOSE", false); + static int verbose = dmlc::GetEnv("MXNET_SUBGRAPH_VERBOSE", 1); std::vector tmp_in_args = in_args; std::vector tmp_arg_grad_store = arg_grad_store; std::vector tmp_grad_req_type = grad_req_type; @@ -2011,7 +2011,7 @@ Executor *Executor::Bind(nnvm::Symbol symbol, const auto& backend_name = exec->subgraph_property(); const auto& backend = op::SubgraphBackendRegistry::Get()->GetSubgraphBackend(backend_name); if (exec::SubgraphBackendCheck(backend, default_ctx, verbose)) { - LOG(INFO) << "Subgraph backend " << backend_name << " is activated."; + if (verbose) LOG(INFO) << "Subgraph backend " << backend_name << " is activated."; symbol = exec::BuildSubgraph(symbol, backend, default_ctx, group2ctx, &tmp_in_args, &tmp_arg_grad_store, &tmp_grad_req_type, &tmp_aux_states, verbose); diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc index 6818d757ab79..39c2880d627b 100644 --- a/src/imperative/cached_op.cc +++ b/src/imperative/cached_op.cc @@ -32,6 +32,22 @@ DMLC_REGISTER_PARAMETER(CachedOpConfig); constexpr uint32_t kEidNotExist = std::numeric_limits::max(); +const char CachedOp::FULL[] = "full"; +const char CachedOp::FORWARD[] = "forward"; +const char CachedOp::BACKWARD[] = "backward"; +const char CachedOp::REF_COUNT[] = "ref_count"; +const char CachedOp::MEM_PLAN[] = "mem_plan"; +const char CachedOp::STORAGE_PLAN[] = "storage_plan"; + +namespace { + +std::string AddPrefix(const std::string& prefix, + const std::string& s) { + return prefix + "_" + s; +} + +} // namespace + struct CachedOp::GraphInfo { nnvm::Graph fwd_graph; nnvm::Graph full_graph; @@ -136,7 +152,7 @@ CachedOp::CachedOp( for (const auto& j : idx[i].inputs) ++ref_count[idx.entry_id(j)]; } - fwd_graph_.attrs["forward_ref_count"] = + fwd_graph_.attrs[AddPrefix(FORWARD, REF_COUNT)] = std::make_shared(std::move(ref_count)); inlining_ = !config_.static_alloc && @@ -201,9 +217,9 @@ CachedOp::CachedOp( } } - auto full_ref_count = fwd_graph_.GetAttr >("forward_ref_count"); + auto full_ref_count = fwd_graph_.GetAttr >(AddPrefix(FORWARD, REF_COUNT)); for (size_t i = 0; i < num_forward_entries; ++i) full_ref_count.at(i) += ref_count[i]; - fwd_graph_.attrs["full_ref_count"] = + fwd_graph_.attrs[AddPrefix(FULL, REF_COUNT)] = std::make_shared(std::move(full_ref_count)); size_t num_forward_inputs = num_inputs(); @@ -336,14 +352,15 @@ bool CachedOp::SetForwardGraph( // When dynmaic shape exists, it is not feasible to plan memory ahead of time if (contain_dynamic_shape) { - g.attrs.erase("forward_mem_plan"); - g.attrs.erase("full_mem_plan"); + g.attrs.erase(AddPrefix(FORWARD, MEM_PLAN)); + g.attrs.erase(AddPrefix(FULL, MEM_PLAN)); return false; } + const std::string& prefix = recording ? FULL : FORWARD; if (!match) { - g.attrs.erase("forward_mem_plan"); - g.attrs.erase("full_mem_plan"); - } else if (g.attrs.count(recording ? "full_mem_plan" : "forward_mem_plan")) { + g.attrs.erase(AddPrefix(FORWARD, MEM_PLAN)); + g.attrs.erase(AddPrefix(FULL, MEM_PLAN)); + } else if (g.attrs.count(AddPrefix(prefix, MEM_PLAN))) { return true; } @@ -363,9 +380,9 @@ bool CachedOp::SetForwardGraph( } auto mem_plan = PlanMemory( - &g, std::move(storage), g.GetAttr >( - recording ? "full_ref_count" : "forward_ref_count")); - g.attrs[recording ? "full_mem_plan" : "forward_mem_plan"] = + &g, std::move(storage), g.GetAttr >(AddPrefix(prefix, REF_COUNT)), + AddPrefix(prefix, STORAGE_PLAN)); + g.attrs[AddPrefix(prefix, MEM_PLAN)] = std::make_shared(std::move(mem_plan)); return false; @@ -432,7 +449,7 @@ bool CachedOp::SetBackwardGraph( size_t num_forward_nodes = fwd_graph_.indexed_graph().num_nodes(); size_t num_forward_entries = fwd_graph_.indexed_graph().num_node_entries(); - if (!g.attrs.count("backward_ref_count")) { + if (!g.attrs.count(AddPrefix(BACKWARD, REF_COUNT))) { std::vector ref_count(idx.num_node_entries(), 0); for (size_t i = num_forward_nodes; i < idx.num_nodes(); ++i) { for (const auto& j : idx[i].inputs) ++ref_count[idx.entry_id(j)]; @@ -443,7 +460,7 @@ bool CachedOp::SetBackwardGraph( } } for (const auto& i : idx.outputs()) ++ref_count[idx.entry_id(i)]; - g.attrs["backward_ref_count"] = std::make_shared(std::move(ref_count)); + g.attrs[AddPrefix(BACKWARD, REF_COUNT)] = std::make_shared(std::move(ref_count)); } auto shapes = info->fwd_graph.GetAttr("shape"); @@ -476,8 +493,8 @@ bool CachedOp::SetBackwardGraph( false, node_range, entry_range); if (!match) { - g.attrs.erase("backward_mem_plan"); - } else if (g.attrs.count("backward_mem_plan")) { + g.attrs.erase(AddPrefix(BACKWARD, MEM_PLAN)); + } else if (g.attrs.count(AddPrefix(BACKWARD, MEM_PLAN))) { return true; } @@ -491,11 +508,13 @@ bool CachedOp::SetBackwardGraph( for (const auto i : idx.outputs()) storage[idx.entry_id(i)] = exec::kExternalStorageID; auto mem_plan = PlanMemory( - &g, std::move(storage), g.GetAttr >("backward_ref_count"), + &g, std::move(storage), + g.GetAttr >(AddPrefix(BACKWARD, REF_COUNT)), + AddPrefix(BACKWARD, STORAGE_PLAN), {num_forward_nodes, idx.num_nodes()}, {num_forward_entries, idx.num_node_entries()}, detect_inplace_addto); - g.attrs["backward_mem_plan"] = std::make_shared(std::move(mem_plan)); + g.attrs[AddPrefix(BACKWARD, MEM_PLAN)] = std::make_shared(std::move(mem_plan)); return false; } @@ -526,9 +545,10 @@ void CachedOp::StaticAllocMemory( const auto& default_ctx = state.context; nnvm::Graph& g = keep_fwd ? state.info.full_graph : state.info.fwd_graph; const auto& idx = g.indexed_graph(); - const auto& vstorage_inplace = g.GetAttr >("storage_inplace_index"); - const auto& mem_plan = g.GetAttr( - keep_fwd ? "backward_mem_plan" : (recording ? "full_mem_plan" : "forward_mem_plan")); + const std::string& graph_type = keep_fwd ? BACKWARD : (recording ? FULL : FORWARD); + const auto& storage_plan_attr = AddPrefix(graph_type, STORAGE_PLAN); + const auto& storage_plan = g.GetAttr >(storage_plan_attr); + const auto& mem_plan = g.GetAttr(AddPrefix(graph_type, MEM_PLAN)); std::vector addto_entry; if (g.attrs.count("addto_entry")) { addto_entry = g.GetAttr >("addto_entry"); @@ -558,9 +578,9 @@ void CachedOp::StaticAllocMemory( for (size_t i = start_eid; i < end_eid; ++i) { if (addto_entry.size() && addto_entry[i]) { state.array_reqs[i] = kAddTo; - } else if (vstorage_inplace[i] >= 0) { + } else if (storage_plan[i] >= 0) { state.array_reqs[i] = kWriteInplace; - } else if (vstorage_inplace[i] == -2) { + } else if (storage_plan[i] == -2) { // -2 indicate that the entry is never referenced. state.array_reqs[i] = kNullOp; } else { @@ -862,8 +882,9 @@ OpStatePtr CachedOp::DynamicForward( } // Allocate NDArrays - std::vector ref_count = g.GetAttr >( - recording ? "full_ref_count" : "forward_ref_count"); + const std::string& graph_type = recording ? FULL : FORWARD; + std::vector ref_count = + g.GetAttr >(AddPrefix(graph_type, REF_COUNT)); std::vector array_reqs(arrays.size(), kWriteTo); for (size_t i = 0; i < idx.num_node_entries(); ++i) { @@ -871,8 +892,7 @@ OpStatePtr CachedOp::DynamicForward( } const auto& dispatch_modes = g.GetAttr("dispatch_mode"); if (!use_naive_run) { - const auto& mem_plan = g.GetAttr( - recording ? "full_mem_plan" : "forward_mem_plan"); + const auto& mem_plan = g.GetAttr(AddPrefix(graph_type, MEM_PLAN)); AllocateMemory(g, idx, default_ctx, 0, idx.num_node_entries(), mem_plan, arrays, &array_reqs); const auto& dtypes = g.GetAttr("dtype"); @@ -1011,7 +1031,7 @@ void CachedOp::DynamicBackward( } // Allocate NDArrays - auto ref_count = g.GetAttr >("backward_ref_count"); + auto ref_count = g.GetAttr >(AddPrefix(BACKWARD, REF_COUNT)); if (retain_graph) { for (size_t i = 0; i < num_forward_entries; ++i) ++ref_count[i]; } @@ -1027,7 +1047,7 @@ void CachedOp::DynamicBackward( if (ref_count[i] == 0) array_reqs[i] = kNullOp; } - const auto& mem_plan = g.GetAttr("backward_mem_plan"); + const auto& mem_plan = g.GetAttr(AddPrefix(BACKWARD, MEM_PLAN)); AllocateMemory(g, idx, default_ctx, num_forward_entries, idx.num_node_entries(), mem_plan, arrays, &array_reqs); diff --git a/src/imperative/cached_op.h b/src/imperative/cached_op.h index db049d59ed80..84f96300c27b 100644 --- a/src/imperative/cached_op.h +++ b/src/imperative/cached_op.h @@ -140,6 +140,13 @@ class CachedOp { void RegisterOpHook(const CachedOp::CachedOpMonCallback& callback, bool monitor_all = false); + static const char FULL[]; + static const char FORWARD[]; + static const char BACKWARD[]; + static const char REF_COUNT[]; + static const char MEM_PLAN[]; + static const char STORAGE_PLAN[]; + private: struct GraphInfo; struct DynamicRuntime; diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h index 356b85e67ee2..64034dafb4d5 100644 --- a/src/imperative/imperative_utils.h +++ b/src/imperative/imperative_utils.h @@ -834,6 +834,7 @@ inline MemoryPlanVector PlanMemory( nnvm::Graph* p_g, nnvm::StorageVector&& storage, const std::vector& ref_count, + const std::string& storage_plan, const std::pair& node_range = {0, 0}, const std::pair& entry_range = {0, 0}, bool detect_inplace_addto = false) { @@ -851,6 +852,7 @@ inline MemoryPlanVector PlanMemory( const auto& dtypes = g.GetAttr("dtype"); const auto& shapes = g.GetAttr("shape"); const auto& storage_inplace = g.GetAttr >("storage_inplace_index"); + g.attrs[storage_plan] = std::make_shared(storage_inplace); const auto& storage_ids = g.GetAttr("storage_id"); uint32_t entry_start = entry_range.first; uint32_t entry_end = diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index e1374ecdb9dd..3feccf55b734 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -1616,12 +1616,13 @@ void NDArray::Save(dmlc::Stream *strm) const { nd_cpu.WaitToRead(); save_data = nd_cpu.data(); } else { +#if MXNET_USE_MKLDNN == 1 + // For mkldnn, a copy of *this can ensure no write access pending on *this. + nd_cpu = this->Copy(Context::CPU()); + nd_cpu.WaitToRead(); +#else this->WaitToRead(); nd_cpu = *this; -#if MXNET_USE_MKLDNN == 1 - if (nd_cpu.IsMKLDNNData()) { - nd_cpu = nd_cpu.Reorder2Default(); - } #endif save_data = nd_cpu.data(); } @@ -1714,7 +1715,8 @@ bool NDArray::Load(dmlc::Stream *strm) { " Please turn on np shape semantics in Python using `with np_shape(True)`" " or decorator `use_np_shape` to scope the code of loading the ndarray."; } else { - CHECK(!Imperative::Get()->is_np_shape()) + // when the flag is global on, skip the check since it would be always global on. + CHECK(Imperative::Get()->is_np_shape() == GlobalOn || !Imperative::Get()->is_np_shape()) << "ndarray was not saved in np shape semantics, but being loaded in np shape semantics." " Please turn off np shape semantics in Python using `with np_shape(False)`" " to scope the code of loading the ndarray."; @@ -2005,16 +2007,18 @@ void NDArray::SyncCopyToCPU(void *data, size_t size) const { TBlob dst(data, dshape, cpu::kDevMask, this->dtype_, 0); // NOLINT(*) if (this->ctx().dev_mask() == cpu::kDevMask) { - this->WaitToRead(); - RunContext rctx{this->ctx(), nullptr, nullptr, false}; - NDArray src = *this; + Engine::Get()->PushAsync( + [&](RunContext rctx, Engine::CallbackOnComplete on_complete) { + RunContext ctx{this->ctx(), nullptr, nullptr, false}; + NDArray src = *this; #if MXNET_USE_MKLDNN == 1 - if (src.IsMKLDNNData()) { - src = this->Reorder2Default(); - } + src = this->Reorder2Default(); #endif - ndarray::Copy(src.data(), &dst, - Context::CPU(), Context::CPU(), rctx); + ndarray::Copy(src.data(), &dst, Context::CPU(), Context::CPU(), ctx); + on_complete(); + }, + this->ctx(), {this->var()}, {}, FnProperty::kNormal, 0, "SyncCopyCPU2CPU"); + this->WaitToWrite(); } else { #if MXNET_USE_CUDA Engine::Get()->PushAsync( diff --git a/src/operator/quantization/dequantize.cc b/src/operator/quantization/dequantize.cc index e8e2cd90b86c..9ce135040fb4 100644 --- a/src/operator/quantization/dequantize.cc +++ b/src/operator/quantization/dequantize.cc @@ -43,8 +43,6 @@ bool DequantizeStorageType(const nnvm::NodeAttrs& attrs, } #endif (*out_attrs)[0] = kDefaultStorage; - (*out_attrs)[1] = kDefaultStorage; - (*out_attrs)[2] = kDefaultStorage; return true; } diff --git a/src/operator/subgraph/build_subgraph.cc b/src/operator/subgraph/build_subgraph.cc index d43647ac83b9..0f4c570331a2 100644 --- a/src/operator/subgraph/build_subgraph.cc +++ b/src/operator/subgraph/build_subgraph.cc @@ -318,8 +318,8 @@ void PreSelectSubgraphNodes(const nnvm::Graph& g, SubgraphSelectorV2Ptr subgraph for (auto node : excluded_nodes) { excluded_node_names += node->node->attrs.name + ", "; } - static bool verbose = dmlc::GetEnv("MXNET_SUBGRAPH_VERBOSE", false); - if (verbose) { + static int verbose = dmlc::GetEnv("MXNET_SUBGRAPH_VERBOSE", 1); + if (verbose > 1) { LOG(INFO) << "Found a cycle when BFS from node " << simple_nodes[snid]->node->attrs.name << ". Excluding nodes " << excluded_node_names << "and retrying"; } @@ -706,9 +706,9 @@ void TopSortEntries(const nnvm::Graph& g, } nnvm::Graph BuildSubgraph(nnvm::Graph&& g) { - static bool verbose = dmlc::GetEnv("MXNET_SUBGRAPH_VERBOSE", false); + static int verbose = dmlc::GetEnv("MXNET_SUBGRAPH_VERBOSE", 1); if (!g.HasAttr("subgraph_property")) { // treat the whole graph as a subgraph - if (verbose) { + if (verbose > 1) { LOG(INFO) << "The graph has no attribute of subgraph_property attached. " "The original graph is returned."; } diff --git a/src/operator/tensor/dot-inl.h b/src/operator/tensor/dot-inl.h index 96c869f40d40..8405404dc627 100644 --- a/src/operator/tensor/dot-inl.h +++ b/src/operator/tensor/dot-inl.h @@ -30,6 +30,7 @@ #include #include #include + #include "./util/tensor_util-inl.h" #include "../mshadow_op.h" #include "../elemwise_op_common.h" @@ -1353,6 +1354,7 @@ void BatchDotForward_(const nnvm::NodeAttrs& attrs, using namespace mshadow; using namespace mshadow::expr; mshadow::Stream *s = ctx.get_stream(); + if (req[0] == kNullOp) return; const DotParam& param = nnvm::get(attrs.parsed); CHECK_EQ(outputs[0].type_flag_, inputs[0].type_flag_) << "Binary function only support input/output with the same type"; @@ -1362,115 +1364,46 @@ void BatchDotForward_(const nnvm::NodeAttrs& attrs, (outputs[0].type_flag_ == kFloat16 && ctx.run_ctx.ctx.dev_mask() == mshadow::gpu::kDevMask)) << "dot only supports float32/float64 for CPU, and float16/float32/float64 for GPU"; MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, { - mshadow::Tensor out = outputs[0].get(s); - mshadow::Tensor mlhs = inputs[0].get(s); - mshadow::Tensor mrhs = inputs[1].get(s); - mshadow::Tensor workspace = - ctx.requested[0].get_space_typed(mshadow::Shape1(3 * out.size(0)), s); - if (kNullOp != req[0]) { - if (param.transpose_a && param.transpose_b) { - mshadow::BatchGEMM(out, mlhs, mrhs, (DType)1.0f, - (kAddTo == req[0]) ? (DType)1.0f : (DType)0.0f, - workspace); - } else if (!param.transpose_a && param.transpose_b) { - mshadow::BatchGEMM(out, mlhs, mrhs, (DType)1.0f, - (kAddTo == req[0]) ? (DType)1.0f : (DType)0.0f, - workspace); - } else if (param.transpose_a && !param.transpose_b) { - mshadow::BatchGEMM(out, mlhs, mrhs, (DType)1.0f, - (kAddTo == req[0]) ? (DType)1.0f : (DType)0.0f, - workspace); - } else { - mshadow::BatchGEMM(out, mlhs, mrhs, (DType)1.0f, - (kAddTo == req[0]) ? (DType)1.0f : (DType)0.0f, - workspace); + int ndim = outputs[0].ndim(); + if (outputs[0].shape_.Size() == 0 || inputs[0].shape_.Size() == 0 + || inputs[1].shape_.Size() == 0) { + if (outputs[0].shape_.Size() != 0 && req[0] != kAddTo) { + mxnet_op::Kernel::Launch(s, outputs[0].shape_.Size(), + outputs[0].dptr()); } + return; } - }); -} - -template -void BatchDotBackward_(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - using namespace mshadow; - using namespace mshadow::expr; - mshadow::Stream *s = ctx.get_stream(); - const DotParam& param = nnvm::get(attrs.parsed); - CHECK_NE(req[1], kWriteInplace); - CHECK_NE(req[0], kWriteInplace); - CHECK(outputs[0].type_flag_ == kFloat32 || outputs[0].type_flag_ == kFloat64 || - (outputs[0].type_flag_ == kFloat16 && ctx.run_ctx.ctx.dev_mask() == mshadow::gpu::kDevMask)) - << "dot only supports float32/float64 for CPU, and float16/float32/float64 for GPU"; - MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, { - mshadow::Tensor mout_grad = inputs[0].get(s); - mshadow::Tensor mlhs_data = inputs[1].get(s); - mshadow::Tensor mrhs_data = inputs[2].get(s); - mshadow::Tensor mlhs_grad = outputs[0].get(s); - mshadow::Tensor mrhs_grad = outputs[1].get(s); - mshadow::Tensor workspace = - ctx.requested[0].get_space_typed( - mshadow::Shape2(2, 3 * mout_grad.size(0)), s); - mshadow::Tensor rhs_workspace = workspace[0]; - mshadow::Tensor lhs_workspace = workspace[1]; + size_t batch_size = outputs[0].shape_.ProdShape(0, ndim - 2); + mshadow::Tensor out = + outputs[0].get_with_shape(Shape3(batch_size, + outputs[0].shape_[ndim - 2], + outputs[0].shape_[ndim - 1]), s); + mshadow::Tensor mlhs = + inputs[0].get_with_shape(Shape3(batch_size, + inputs[0].shape_[ndim - 2], + inputs[0].shape_[ndim - 1]), s); + mshadow::Tensor mrhs = + inputs[1].get_with_shape(Shape3(batch_size, + inputs[1].shape_[ndim - 2], + inputs[1].shape_[ndim - 1]), s); + mshadow::Tensor workspace = + ctx.requested[0].get_space_typed(mshadow::Shape1(3 * out.size(0)), s); if (param.transpose_a && param.transpose_b) { - // Gradient of z = dot(x.T, y.T) - // dy = dot(x, dz).T = dot(dz.T, x.T) - // dx = dot(dz, y).T = dot(y.T, dz.T) - if (kNullOp != req[1]) { - mshadow::BatchGEMM(mrhs_grad, mout_grad, mlhs_data, (DType)1.0f, - (kAddTo == req[1]) ? (DType)1.0f : (DType)0.0f, - rhs_workspace); - } - if (kNullOp != req[0]) { - mshadow::BatchGEMM(mlhs_grad, mrhs_data, mout_grad, (DType)1.0f, - (kAddTo == req[0]) ? (DType)1.0f : (DType)0.0f, - lhs_workspace); - } + mshadow::BatchGEMM(out, mlhs, mrhs, (DType)1.0f, + (kAddTo == req[0]) ? (DType)1.0f : (DType)0.0f, + workspace); } else if (!param.transpose_a && param.transpose_b) { - // Gradient of z = dot(x, y.T) - // dy = dot(x.T, dz).T = dot(dz.T, x) - // dx = dot(dz, y) - if (kNullOp != req[1]) { - mshadow::BatchGEMM(mrhs_grad, mout_grad, mlhs_data, (DType)1.0f, - (kAddTo == req[1]) ? (DType)1.0f : (DType)0.0f, - rhs_workspace); - } - if (kNullOp != req[0]) { - mshadow::BatchGEMM(mlhs_grad, mout_grad, mrhs_data, (DType)1.0f, - (kAddTo == req[0]) ? (DType)1.0f : (DType)0.0f, - lhs_workspace); - } + mshadow::BatchGEMM(out, mlhs, mrhs, (DType)1.0f, + (kAddTo == req[0]) ? (DType)1.0f : (DType)0.0f, + workspace); } else if (param.transpose_a && !param.transpose_b) { - // Gradient of z = dot(x.T, y) - // dy = dot(x, dz) - // dx = dot(dz, y.T).T = dot(y, dz.T) - if (kNullOp != req[1]) { - mshadow::BatchGEMM(mrhs_grad, mlhs_data, mout_grad, (DType)1.0f, - (kAddTo == req[1]) ? (DType)1.0f : (DType)0.0f, - rhs_workspace); - } - if (kNullOp != req[0]) { - mshadow::BatchGEMM(mlhs_grad, mrhs_data, mout_grad, (DType)1.0f, - (kAddTo == req[0]) ? (DType)1.0f : (DType)0.0f, - lhs_workspace); - } + mshadow::BatchGEMM(out, mlhs, mrhs, (DType)1.0f, + (kAddTo == req[0]) ? (DType)1.0f : (DType)0.0f, + workspace); } else { - // Gradient of z = dot(x, y) - // dy = dot(x.T, dz) - // dx = dot(dz, y.T) - if (kNullOp != req[1]) { - mshadow::BatchGEMM(mrhs_grad, mlhs_data, mout_grad, (DType)1.0f, - (kAddTo == req[1]) ? (DType)1.0f : (DType)0.0f, - rhs_workspace); - } - if (kNullOp != req[0]) { - mshadow::BatchGEMM(mlhs_grad, mout_grad, mrhs_data, (DType)1.0f, - (kAddTo == req[0]) ? (DType)1.0f : (DType)0.0f, - lhs_workspace); - } + mshadow::BatchGEMM(out, mlhs, mrhs, (DType)1.0f, + (kAddTo == req[0]) ? (DType)1.0f : (DType)0.0f, + workspace); } }); } @@ -1485,24 +1418,34 @@ inline bool BatchDotShape(const nnvm::NodeAttrs& attrs, mxnet::TShape& rshape = (*in_attrs)[1]; // return false if lhs and rhs both have fully unknown shape if (!ndim_is_known(lshape) || !ndim_is_known(rshape)) return false; - if (lshape.ndim() == 3 && rshape.ndim() == 3) { + if (lshape.ndim() >= 3 && rshape.ndim() >= 3 && lshape.ndim() == rshape.ndim()) { + int ndim = lshape.ndim(); // only partially infer shape if last dim of lhs and second dim of rhs is known - bool last_dim_known = dim_size_is_known(lshape, 2); - bool second_dim_known = dim_size_is_known(rshape, 1); + bool last_dim_known = dim_size_is_known(lshape, ndim - 1); + bool second_dim_known = dim_size_is_known(rshape, ndim - 2); if ( !last_dim_known || !second_dim_known) return false; - CHECK(lshape[0] == rshape[0]) - << "batch_dot shape error(batch_size must be equal): " << lshape << " X " << rshape - << " trans_a=" << param.transpose_a << " trans_b=" << param.transpose_b; - index_t out_m = param.transpose_a ? lshape[2] : lshape[1]; - index_t lshape_k = param.transpose_a ? lshape[1] : lshape[2]; - index_t out_n = param.transpose_b ? rshape[1] : rshape[2]; - index_t rshape_k = param.transpose_b ? rshape[2] : rshape[1]; - CHECK(lshape_k == rshape_k) - << "batch_dot shape error(shape mismatch): " << lshape << " X " << rshape + for (int i = 0; i < ndim - 2; i++) { + CHECK_EQ(lshape[i], rshape[i]) + << "batch_dot shape error (the leading batch dimensions must be equal): " + << lshape << " X " << rshape + << " trans_a=" << param.transpose_a << " trans_b=" << param.transpose_b; + } + dim_t out_m = param.transpose_a ? lshape[ndim - 1] : lshape[ndim - 2]; + dim_t lshape_k = param.transpose_a ? lshape[ndim - 2] : lshape[ndim - 1]; + dim_t out_n = param.transpose_b ? rshape[ndim - 2] : rshape[ndim - 1]; + dim_t rshape_k = param.transpose_b ? rshape[ndim - 1] : rshape[ndim - 2]; + CHECK_EQ(lshape_k, rshape_k) + << "batch_dot shape error (shape mismatch): " << lshape << " X " << rshape << " trans_a=" << param.transpose_a << " trans_b=" << param.transpose_b; - SHAPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::Shape3(lshape[0], out_m, out_n)); + std::vector out_shape_vec; + for (int i = 0; i < ndim - 2; i++) { + out_shape_vec.push_back(lshape[i]); + } + out_shape_vec.push_back(out_m); + out_shape_vec.push_back(out_n); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(out_shape_vec)); } else { - LOG(FATAL) << "batch_dot currently only support 3D*3D array" + LOG(FATAL) << "batch_dot currently only support N-D*N-D array (N >= 3)" << lshape << " v.s. " << rshape; } // return true if output shape is fully inferred diff --git a/src/operator/tensor/dot.cc b/src/operator/tensor/dot.cc index 11a056146e1d..556260ed9600 100644 --- a/src/operator/tensor/dot.cc +++ b/src/operator/tensor/dot.cc @@ -115,13 +115,13 @@ NNVM_REGISTER_OP(batch_dot) .describe(R"doc(Batchwise dot product. ``batch_dot`` is used to compute dot product of ``x`` and ``y`` when ``x`` and -``y`` are data in batch, namely 3D arrays in shape of `(batch_size, :, :)`. +``y`` are data in batch, namely N-D (N >= 3) arrays in shape of `(B0, ..., B_i, :, :)`. -For example, given ``x`` with shape `(batch_size, n, m)` and ``y`` with shape -`(batch_size, m, k)`, the result array will have shape `(batch_size, n, k)`, +For example, given ``x`` with shape `(B_0, ..., B_i, N, M)` and ``y`` with shape +`(B_0, ..., B_i, M, K)`, the result array will have shape `(B_0, ..., B_i, N, K)`, which is computed by:: - batch_dot(x,y)[i,:,:] = dot(x[i,:,:], y[i,:,:]) + batch_dot(x,y)[b_0, ..., b_i, :, :] = dot(x[b_0, ..., b_i, :, :], y[b_0, ..., b_i, :, :]) )doc" ADD_FILELINE) .set_num_inputs(2) @@ -138,21 +138,73 @@ which is computed by:: return std::vector{ResourceRequest::kTempSpace}; }) .set_attr("FCompute", BatchDotForward_) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_batch_dot"}) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, + const std::vector& ograds) { + const DotParam& param = nnvm::get(n->attrs.parsed); + nnvm::NodePtr lhs_grad; + nnvm::NodePtr rhs_grad; + std::string lhs_gnode_name = n->attrs.name + "_backward_lhs"; + std::string rhs_gnode_name = n->attrs.name + "_backward_rhs"; + if (param.transpose_a && param.transpose_b) { + // Gradient of z = dot(x.T, y.T) + // dx = dot(dz, y).T = dot(y.T, dz.T) + // dy = dot(x, dz).T = dot(dz.T, x.T) + lhs_grad = MakeNode("batch_dot", lhs_gnode_name, + {n->inputs[1], ograds[0]}, &(n->attrs.dict), &n); + rhs_grad = MakeNode("batch_dot", rhs_gnode_name, + {ograds[0], n->inputs[0]}, &(n->attrs.dict), &n); + } else if (!param.transpose_a && param.transpose_b) { + // Gradient of z = dot(x, y.T) + // dx = dot(dz, y) + // dy = dot(x.T, dz).T = dot(dz.T, x) + auto lhs_attrs_dict = n->attrs.dict; + auto rhs_attrs_dict = n->attrs.dict; + lhs_attrs_dict["transpose_a"] = "false"; + lhs_attrs_dict["transpose_b"] = "false"; + rhs_attrs_dict["transpose_a"] = "true"; + rhs_attrs_dict["transpose_b"] = "false"; + lhs_grad = MakeNode("batch_dot", lhs_gnode_name, + {ograds[0], n->inputs[1]}, &lhs_attrs_dict, &n); + rhs_grad = MakeNode("batch_dot", rhs_gnode_name, + {ograds[0], n->inputs[0]}, &rhs_attrs_dict, &n); + } else if (param.transpose_a && !param.transpose_b) { + // Gradient of z = dot(x.T, y) + // dx = dot(dz, y.T).T = dot(y, dz.T) + // dy = dot(x, dz) + auto lhs_attrs_dict = n->attrs.dict; + auto rhs_attrs_dict = n->attrs.dict; + lhs_attrs_dict["transpose_a"] = "false"; + lhs_attrs_dict["transpose_b"] = "true"; + rhs_attrs_dict["transpose_a"] = "false"; + rhs_attrs_dict["transpose_b"] = "false"; + lhs_grad = MakeNode("batch_dot", lhs_gnode_name, + {n->inputs[1], ograds[0]}, &lhs_attrs_dict, &n); + rhs_grad = MakeNode("batch_dot", rhs_gnode_name, + {n->inputs[0], ograds[0]}, &rhs_attrs_dict, &n); + } else { + // Gradient of z = dot(x, y) + // dx = dot(dz, y.T) + // dy = dot(x.T, dz) + auto lhs_attrs_dict = n->attrs.dict; + auto rhs_attrs_dict = n->attrs.dict; + lhs_attrs_dict["transpose_a"] = "false"; + lhs_attrs_dict["transpose_b"] = "true"; + rhs_attrs_dict["transpose_a"] = "true"; + rhs_attrs_dict["transpose_b"] = "false"; + lhs_grad = MakeNode("batch_dot", lhs_gnode_name, + {ograds[0], n->inputs[1]}, &lhs_attrs_dict, &n); + rhs_grad = MakeNode("batch_dot", rhs_gnode_name, + {n->inputs[0], ograds[0]}, &rhs_attrs_dict, &n); + } + std::vector ret; + ret.emplace_back(nnvm::NodeEntry{lhs_grad, 0, 0}); + ret.emplace_back(nnvm::NodeEntry{rhs_grad, 0, 0}); + return ret; +}) .add_argument("lhs", "NDArray-or-Symbol", "The first input") .add_argument("rhs", "NDArray-or-Symbol", "The second input") .add_arguments(DotParam::__FIELDS__()); -NNVM_REGISTER_OP(_backward_batch_dot) -.set_num_inputs(3) -.set_num_outputs(2) -.set_attr_parser(ParamParser) -.set_attr("FResourceRequest", - [](const NodeAttrs& attrs) { - return std::vector{ResourceRequest::kTempSpace}; - }) -.set_attr("TIsBackward", true) -.set_attr("FCompute", BatchDotBackward_); - } // namespace op } // namespace mxnet diff --git a/src/operator/tensor/dot.cu b/src/operator/tensor/dot.cu index 8ee2e2832fbb..b245b1c9e5ed 100644 --- a/src/operator/tensor/dot.cu +++ b/src/operator/tensor/dot.cu @@ -38,8 +38,5 @@ NNVM_REGISTER_OP(_backward_dot) NNVM_REGISTER_OP(batch_dot) .set_attr("FCompute", BatchDotForward_); -NNVM_REGISTER_OP(_backward_batch_dot) -.set_attr("FCompute", BatchDotBackward_); - } // namespace op } // namespace mxnet diff --git a/tests/nightly/test_large_array.py b/tests/nightly/test_large_array.py index 0cb21cedee35..c18a95400f22 100644 --- a/tests/nightly/test_large_array.py +++ b/tests/nightly/test_large_array.py @@ -1415,10 +1415,10 @@ def check_arcsinh(): assert_correctness_of_trigonometric_ops(y, expected_output) def check_arccosh(): - x = create_input_for_trigonometric_ops([1, np.pi/2, 3*np.pi/4, np.pi]) + x = create_input_for_trigonometric_ops([1, np.pi/2, 3*np.pi/4, np.pi, 5*np.pi/4]) y = nd.arccosh(x) # expected ouput for indices=(0, 1, -3, -2, -1) after applying arccosh() - expected_output = [0, np.arccosh(np.pi/2), np.arccosh(3*np.pi/4), np.arccosh(np.pi)] + expected_output = [0, np.arccosh(np.pi/2), np.arccosh(3*np.pi/4), np.arccosh(np.pi), np.arccosh(5*np.pi/4)] assert_correctness_of_trigonometric_ops(y, expected_output) def check_arctanh(): diff --git a/tests/nightly/test_large_vector.py b/tests/nightly/test_large_vector.py index 23f4b8e4f310..b8edc83220bd 100644 --- a/tests/nightly/test_large_vector.py +++ b/tests/nightly/test_large_vector.py @@ -556,8 +556,8 @@ def test_concat(): a = nd.ones(LARGE_X) b = nd.zeros(LARGE_X) c = nd.concat(a, b, dim=0) - assert c[0][0] == 1 - assert c[-1][-1] == 0 + assert c[0] == 1 + assert c[-1] == 0 assert c.shape[0] == (2 * LARGE_X) @@ -710,6 +710,37 @@ def test_full(): assert a[-1] == 3 +def test_sign(): + a = mx.nd.random.normal(-1, 1, shape=LARGE_X) + mx_res = mx.nd.sign(a) + assert_almost_equal(mx_res[-1].asnumpy(), np.sign(a[-1].asnumpy())) + + +def test_logical(): + def check_logical_and(a, b): + mx_res = mx.nd.logical_and(a, b) + assert_almost_equal(mx_res[-1].asnumpy(), np.logical_and(a[-1].asnumpy(), b[-1].asnumpy())) + + def check_logical_or(a, b): + mx_res = mx.nd.logical_or(a, b) + assert_almost_equal(mx_res[-1].asnumpy(), np.logical_or(a[-1].asnumpy(), b[-1].asnumpy())) + + def check_logical_not(a, b): + mx_res = mx.nd.logical_not(a, b) + assert_almost_equal(mx_res[-1].asnumpy(), np.logical_not(a[-1].asnumpy(), b[-1].asnumpy())) + + def check_logical_xor(a, b): + mx_res = mx.nd.logical_xor(a, b) + assert_almost_equal(mx_res[-1].asnumpy(), np.logical_xor(a[-1].asnumpy(), b[-1].asnumpy())) + + a = mx.nd.ones(LARGE_X) + b = mx.nd.zeros(LARGE_X) + check_logical_and(a, b) + check_logical_or(a, b) + check_logical_not(a, b) + check_logical_xor(a, b) + + def test_astype(): x = create_vector(size=LARGE_X//4) x = nd.tile(x, 4) @@ -752,7 +783,7 @@ def assert_correctness_of_rounding_ops(output, mid, expected_vals): def test_rounding_ops(): x = create_input_for_rounding_ops() - + def check_ceil(): y = nd.ceil(x) # expected ouput for middle 5 values after applying ceil() @@ -854,6 +885,48 @@ def check_tan(): expected_output = [-.577, -1, 0, 1, .577] assert_correctness_of_trigonometric_ops(y, expected_output) + def check_arcsinh(): + x = create_input_for_trigonometric_ops([-np.pi/2, -np.pi/4, 0, np.pi/4, np.pi/2]) + y = nd.arcsinh(x) + # expected ouput for indices=(0, 1, -3, -2, -1) after applying arcsinh() + expected_output = [np.arcsinh(-np.pi/2), np.arcsinh(-np.pi/4), 0, np.arcsinh(np.pi/4), np.arcsinh(np.pi/2)] + assert_correctness_of_trigonometric_ops(y, expected_output) + + def check_arccosh(): + x = create_input_for_trigonometric_ops([1, np.pi/2, 3*np.pi/4, np.pi, 5*np.pi/4]) + y = nd.arccosh(x) + # expected ouput for indices=(0, 1, -3, -2, -1) after applying arccosh() + expected_output = [0, np.arccosh(np.pi/2), np.arccosh(3*np.pi/4), np.arccosh(np.pi), np.arccosh(5*np.pi/4)] + assert_correctness_of_trigonometric_ops(y, expected_output) + + def check_arctanh(): + x = create_input_for_trigonometric_ops([-1/4, -1/2, 0, 1/4, 1/2]) + y = nd.arctanh(x) + # expected ouput for indices=(0, 1, -3, -2, -1) after applying arctanh() + expected_output = [np.arctanh(-1/4), np.arctanh(-1/2), 0, np.arctanh(1/4), np.arctanh(1/2)] + assert_correctness_of_trigonometric_ops(y, expected_output) + + def check_sinh(): + x = create_input_for_trigonometric_ops([-np.pi/2, -np.pi/4, 0, np.pi/4, np.pi/2]) + y = nd.sinh(x) + # expected ouput for indices=(0, 1, -3, -2, -1) after applying sinh() + expected_output = [np.sinh(-np.pi/2), np.sinh(-np.pi/4), 0, np.sinh(np.pi/4), np.sinh(np.pi/2)] + assert_correctness_of_trigonometric_ops(y, expected_output) + + def check_cosh(): + x = create_input_for_trigonometric_ops([0, 1, np.pi/2, 3*np.pi/4, np.pi]) + y = nd.cosh(x) + # expected ouput for indices=(0, 1, -3, -2, -1) after applying cosh() + expected_output = [1, np.cosh(1), np.cosh(np.pi/2), np.cosh(3*np.pi/4), np.cosh(np.pi)] + assert_correctness_of_trigonometric_ops(y, expected_output) + + def check_tanh(): + x = create_input_for_trigonometric_ops([-1/4, -1/2, 0, 1/4, 1/2]) + y = nd.tanh(x) + # expected ouput for indices=(0, 1, -3, -2, -1) after applying tanh() + expected_output = [np.tanh(-1/4), np.tanh(-1/2), 0, np.tanh(1/4), np.tanh(1/2)] + assert_correctness_of_trigonometric_ops(y, expected_output) + def check_radians(): x = create_input_for_trigonometric_ops([0, 90, 180, 270, 360]) y = nd.radians(x) @@ -874,6 +947,12 @@ def check_degrees(): check_sin() check_cos() check_tan() + check_arcsinh() + check_arccosh() + check_arctanh() + check_sinh() + check_cosh() + check_tanh() check_radians() check_degrees() diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index b764ac73d30c..ae8ad621df75 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -24,6 +24,7 @@ import platform import mxnet as mx import scipy.stats as ss +from nose.tools import assert_raises from mxnet import np, npx from mxnet.gluon import HybridBlock from mxnet.base import MXNetError @@ -901,6 +902,124 @@ def hybrid_forward(self, F, a): expected_grad[basic_index] = 1 assert same(a.grad.asnumpy(), expected_grad) +@with_seed() +@use_np +def test_npx_batch_dot(): + ctx = mx.context.current_context() + dtypes = ['float32', 'float64'] + if ctx.device_type == 'gpu': + dtypes += ['float16'] + eps_dict = {'float32': 1E-4, 'float64': 1E-4, 'float16': 1E-3} + class TestBatchDot(HybridBlock): + def __init__(self, transpose_a, transpose_b): + super(TestBatchDot, self).__init__() + self._transpose_a = transpose_a + self._transpose_b = transpose_b + + def hybrid_forward(self, F, lhs, rhs): + return F.npx.batch_dot(lhs, rhs, + transpose_a=self._transpose_a, + transpose_b=self._transpose_b) + + def batch_dot_numpy(lhs, rhs, transpose_a, transpose_b): + assert lhs.ndim == rhs.ndim >= 3 + if transpose_a: + lhs = lhs.swapaxes(-1, -2) + if transpose_b: + rhs = rhs.swapaxes(-1, -2) + return _np.matmul(lhs, rhs) + + def gt_grad_batch_dot_numpy(lhs, rhs, ograd, transpose_a, transpose_b, lhs_req, rhs_req, + init_lhs_grad, init_rhs_grad): + + if transpose_a and transpose_b: + # Gradient of z = dot(x.T, y.T) + # dx = dot(dz, y).T = dot(y.T, dz.T) + # dy = dot(x, dz).T = dot(dz.T, x.T) + lhs_grad = batch_dot_numpy(rhs, ograd, transpose_a=True, transpose_b=True) + rhs_grad = batch_dot_numpy(ograd, lhs, transpose_a=True, transpose_b=True) + elif not transpose_a and transpose_b: + # Gradient of z = dot(x, y.T) + # dx = dot(dz, y) + # dy = dot(x.T, dz).T = dot(dz.T, x) + lhs_grad = batch_dot_numpy(ograd, rhs, transpose_a=False, transpose_b=False) + rhs_grad = batch_dot_numpy(ograd, lhs, transpose_a=True, transpose_b=False) + elif transpose_a and not transpose_b: + # Gradient of z = dot(x.T, y) + # dx = dot(dz, y.T).T = dot(y, dz.T) + # dy = dot(x, dz) + lhs_grad = batch_dot_numpy(rhs, ograd, transpose_a=False, transpose_b=True) + rhs_grad = batch_dot_numpy(lhs, ograd, transpose_a=False, transpose_b=False) + else: + # Gradient of z = dot(x, y) + # dx = dot(dz, y.T) + # dy = dot(x.T, dz) + lhs_grad = batch_dot_numpy(ograd, rhs, transpose_a=False, transpose_b=True) + rhs_grad = batch_dot_numpy(lhs, ograd, transpose_a=True, transpose_b=False) + if lhs_req == 'add': + lhs_grad += init_lhs_grad + if rhs_req == 'add': + rhs_grad += init_rhs_grad + return lhs_grad, rhs_grad + + + configs = [ + ((2, 3, 0), (2, 4, 0), False, True), + ((2, 4, 3), (2, 4, 3), True, False), + ((0, 3, 0), (0, 0, 2), False, False), + ((3, 2, 3, 2), (3, 2, 2, 3), True, True), + ((3, 1, 5, 2), (3, 1, 2, 1), False, False) + ] + bad_configs = [ + ((5, 3, 2), (5, 1, 3), False, False), + ((2, 5, 3, 1), (2, 4, 3, 1), True, False) + ] + for hybridize in [True, False]: + for lhs_shape, rhs_shape, transpose_a, transpose_b in configs: + for dtype in dtypes: + eps = eps_dict[dtype] + for lhs_grad_req in ['write', 'add']: + for rhs_grad_req in ['write', 'add']: + f_batch_dot = TestBatchDot(transpose_a=transpose_a, + transpose_b=transpose_b) + if hybridize: + f_batch_dot.hybridize() + lhs_val = mx.np.array(_np.random.uniform(-1.0, 1.0, lhs_shape), dtype=dtype) + rhs_val = mx.np.array(_np.random.uniform(-1.0, 1.0, rhs_shape), dtype=dtype) + lhs_val.attach_grad(grad_req=lhs_grad_req) + rhs_val.attach_grad(grad_req=rhs_grad_req) + gt_out = batch_dot_numpy(lhs_val.asnumpy(), rhs_val.asnumpy(), + transpose_a, transpose_b) + init_lhs_grad = mx.np.random.uniform(-1.0, 1.0, lhs_shape, dtype=dtype) + init_rhs_grad = mx.np.random.uniform(-1.0, 1.0, rhs_shape, dtype=dtype) + o_grad = mx.np.random.uniform(-1.0, 1.0, gt_out.shape, dtype=dtype) + if lhs_grad_req == 'add': + lhs_val.grad[:] = init_lhs_grad + if rhs_grad_req == 'add': + rhs_val.grad[:] = init_rhs_grad + with mx.autograd.record(): + out = f_batch_dot(lhs_val, rhs_val) + out.backward(o_grad) + assert_almost_equal(out.asnumpy(), gt_out, rtol=eps, atol=eps) + gt_lhs_grad, gt_rhs_grad = gt_grad_batch_dot_numpy(lhs_val.asnumpy(), + rhs_val.asnumpy(), + o_grad.asnumpy(), + transpose_a=transpose_a, + transpose_b=transpose_b, + lhs_req=lhs_grad_req, + rhs_req=rhs_grad_req, + init_lhs_grad=init_lhs_grad.asnumpy(), + init_rhs_grad=init_rhs_grad.asnumpy()) + assert_almost_equal(lhs_val.grad.asnumpy(), gt_lhs_grad, rtol=eps, atol=eps) + assert_almost_equal(rhs_val.grad.asnumpy(), gt_rhs_grad, rtol=eps, atol=eps) + for lhs_shape, rhs_shape, transpose_a, transpose_b in bad_configs: + for dtype in dtypes: + lhs_val = mx.np.array(_np.random.uniform(-1.0, 1.0, lhs_shape), dtype=dtype) + rhs_val = mx.np.array(_np.random.uniform(-1.0, 1.0, rhs_shape), dtype=dtype) + assert_raises(MXNetError, lambda: mx.npx.batch_dot(lhs_val, rhs_val, + transpose_a=transpose_a, + transpose_b=transpose_b)) + @with_seed() @use_np diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 7ea106b2620f..dde28fdb766f 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -2964,6 +2964,7 @@ def test_big_transpose(): assert_allclose(x_np, z.asnumpy().astype('uint8')) +@with_seed() def test_larger_transpose(): x = mx.nd.random.normal(shape=(50,51)) y = mx.nd.transpose(x) @@ -3324,9 +3325,9 @@ def test_batch_dot(): agrad_npy = np.empty((batch_size, m, k), dtype=data_type) bgrad_npy = np.empty((batch_size, k, n), dtype=data_type) a_init_grad_npy = np.random.normal(size=(batch_size, m, k)) - a_init_grad_npy = a_npy.astype(data_type) + a_init_grad_npy = a_init_grad_npy.astype(data_type) b_init_grad_npy = np.random.normal(size=(batch_size, k, n)) - b_init_grad_npy = b_npy.astype(data_type) + b_init_grad_npy = b_init_grad_npy.astype(data_type) for i in range(batch_size): c_npy[i, :, :] = np.dot(a_npy[i, :, :], b_npy[i, :, :]) bgrad_npy[i, :, :] = np.dot(a_npy[i, :, :].T, ograd_npy[i, :, :])