diff --git a/ci/docker/install/ubuntu_julia.sh b/ci/docker/install/ubuntu_julia.sh
index 62013e36d8fd..13093acc42f5 100755
--- a/ci/docker/install/ubuntu_julia.sh
+++ b/ci/docker/install/ubuntu_julia.sh
@@ -22,16 +22,22 @@
 
 set -ex
 
-export JLBINARY='julia.tar.gz'
-export JULIADIR='/work/julia'
-export JULIA="${JULIADIR}/bin/julia"
+function install_julia() {
+    local suffix=`echo $1 | sed 's/\.//'`  # 0.7 -> 07; 1.0 -> 10
+    local JLBINARY="julia-$1.tar.gz"
+    local JULIADIR="/work/julia$suffix"
+    local JULIA="${JULIADIR}/bin/julia"
 
-mkdir -p $JULIADIR
-# The julia version in Ubuntu repo is too old
-# We download the tarball from the official link:
-#   https://julialang.org/downloads/
-wget -O $JLBINARY https://julialang-s3.julialang.org/bin/linux/x64/0.6/julia-0.6.2-linux-x86_64.tar.gz
-tar xzvf $JLBINARY -C $JULIADIR --strip 1
-rm $JLBINARY
+    mkdir -p $JULIADIR
+    # The julia version in Ubuntu repo is too old
+    # We download the tarball from the official link:
+    #   https://julialang.org/downloads/
+    wget -O $JLBINARY https://julialang-s3.julialang.org/bin/linux/x64/$1/julia-$2-linux-x86_64.tar.gz
+    tar xzvf $JLBINARY -C $JULIADIR --strip 1
+    rm $JLBINARY
 
-$JULIA -e 'versioninfo()'
+    $JULIA -e 'using InteractiveUtils; versioninfo()'
+}
+
+install_julia 0.7 0.7.0
+install_julia 1.0 1.0.3
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index f88e867b0d49..148477a54776 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -893,35 +893,45 @@ unittest_ubuntu_gpu_R() {
     make rpkgtest R_LIBS=/tmp/r-site-library R_GPU_ENABLE=1
 }
 
-unittest_ubuntu_cpu_julia06() {
+unittest_ubuntu_cpu_julia() {
     set -ex
-    export PATH="/work/julia/bin:$PATH"
+    export PATH="$1/bin:$PATH"
     export MXNET_HOME='/work/mxnet'
-    export JULIA_PKGDIR='/work/julia-pkg'
-    export DEPDIR=`julia -e 'print(Pkg.dir())'`
+    export JULIA_DEPOT_PATH='/work/julia-depot'
+    export DEVDIR="$JULIA_DEPOT_PATH/dev"
 
-    julia -e 'versioninfo()'
-    julia -e 'Pkg.init()'
+    julia -e 'using InteractiveUtils; versioninfo()'
 
     # install package
-    ln -sf ${MXNET_HOME}/julia ${DEPDIR}/MXNet
+    mkdir -p $DEVDIR
+    ln -sf ${MXNET_HOME}/julia ${DEVDIR}/MXNet
 
-    # install dependencies
-    julia -e 'Pkg.resolve()'
+    # register MXNet.jl and dependencies
+    julia -e 'using Pkg; Pkg.develop("MXNet")'
 
     # FIXME
     export LD_PRELOAD='/usr/lib/x86_64-linux-gnu/libjemalloc.so'
 
     # use the prebuilt binary from $MXNET_HOME/lib
-    julia -e 'Pkg.build("MXNet")'
+    julia -e 'using Pkg; Pkg.build("MXNet")'
 
     # run the script `julia/test/runtests.jl`
-    julia -e 'Pkg.test("MXNet")'
+    julia -e 'using Pkg; Pkg.test("MXNet")'
 
     # See https://github.com/dmlc/MXNet.jl/pull/303#issuecomment-341171774
     julia -e 'using MXNet; mx._sig_checker()'
 }
 
+unittest_ubuntu_cpu_julia07() {
+    set -ex
+    unittest_ubuntu_cpu_julia /work/julia07
+}
+
+unittest_ubuntu_cpu_julia10() {
+    set -ex
+    unittest_ubuntu_cpu_julia /work/julia10
+}
+
 unittest_centos7_cpu() {
     set -ex
     cd /work/mxnet
@@ -1224,24 +1234,18 @@ deploy_docs() {
 
 deploy_jl_docs() {
     set -ex
-    export PATH="/work/julia/bin:$PATH"
+    export PATH="/work/julia10/bin:$PATH"
     export MXNET_HOME='/work/mxnet'
-    export JULIA_PKGDIR='/work/julia-pkg'
-    export DEPDIR=`julia -e 'print(Pkg.dir())'`
+    export JULIA_DEPOT_PATH='/work/julia-depot'
+    export DEVDIR="$JULIA_DEPOT_PATH/dev"
 
-    julia -e 'versioninfo()'
-    julia -e 'Pkg.init()'
-    ln -sf ${MXNET_HOME}/julia ${DEPDIR}/MXNet
-    julia -e 'Pkg.resolve()'
+    julia -e 'using InteractiveUtils; versioninfo()'
+    mkdir -p $DEVDIR
 
     # FIXME
     export LD_PRELOAD='/usr/lib/x86_64-linux-gnu/libjemalloc.so'
 
-    # use the prebuilt binary from $MXNET_HOME/lib
-    julia -e 'Pkg.build("MXNet")'
-    # build docs
-    julia -e 'Pkg.add("Documenter")'
-    julia -e 'cd(Pkg.dir("MXNet")); include(joinpath("docs", "make.jl"))'
+    make -C julia/docs
 
     # TODO: make Jenkins worker push to MXNet.jl ph-pages branch if master build
     # ...
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 74bde1eee211..9a03f01bc83a 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -918,13 +918,26 @@ def test_unix_r_gpu() {
     }]
 }
 
-def test_unix_julia_cpu() {
-    return ['Julia 0.6: CPU': {
+def test_unix_julia07_cpu() {
+    return ['Julia 0.7: CPU': {
       node(NODE_LINUX_CPU) {
-        ws('workspace/ut-julia06-cpu') {
+        ws('workspace/ut-julia07-cpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('cpu', mx_lib)
-            utils.docker_run('ubuntu_cpu', 'unittest_ubuntu_cpu_julia06', false)
+            utils.docker_run('ubuntu_cpu', 'unittest_ubuntu_cpu_julia07', false)
+          }
+        }
+      }
+    }]
+}
+
+def test_unix_julia10_cpu() {
+    return ['Julia 1.0: CPU': {
+      node(NODE_LINUX_CPU) {
+        ws('workspace/ut-julia10-cpu') {
+          timeout(time: max_time, unit: 'MINUTES') {
+            utils.unpack_and_init('cpu', mx_lib)
+            utils.docker_run('ubuntu_cpu', 'unittest_ubuntu_cpu_julia10', false)
           }
         }
       }
diff --git a/ci/jenkins/Jenkinsfile_unix_cpu b/ci/jenkins/Jenkinsfile_unix_cpu
index e581bcf65dc5..6f4cc5023bde 100644
--- a/ci/jenkins/Jenkinsfile_unix_cpu
+++ b/ci/jenkins/Jenkinsfile_unix_cpu
@@ -39,7 +39,7 @@ core_logic: {
     custom_steps.compile_unix_mkl_cpu(),
     custom_steps.compile_unix_mkldnn_cpu(),
     custom_steps.compile_unix_mkldnn_mkl_cpu()
-  ]) 
+  ])
 
   utils.parallel_stage('Tests', [
     custom_steps.test_unix_python2_cpu(),
@@ -52,7 +52,8 @@ core_logic: {
     custom_steps.test_unix_scala_cpu(),
     custom_steps.test_unix_clojure_cpu(),
     custom_steps.test_unix_r_cpu(),
-    custom_steps.test_unix_julia_cpu(),
+    custom_steps.test_unix_julia07_cpu(),
+    custom_steps.test_unix_julia10_cpu(),
     custom_steps.test_unix_onnx_cpu(),
 
     /*  Disabled due to master build failure:
diff --git a/julia/.gitignore b/julia/.gitignore
index d6791c8491bf..3687ed485c5a 100644
--- a/julia/.gitignore
+++ b/julia/.gitignore
@@ -7,7 +7,4 @@ data
 deps/src
 deps/usr
 deps/deps.jl
-docs/_build
-docs/build/
-docs/site/
 .vscode
diff --git a/julia/README.md b/julia/README.md
index 2ff7553063f3..91a3981464be 100644
--- a/julia/README.md
+++ b/julia/README.md
@@ -50,7 +50,7 @@ labels = reduce(
 labels .= labels .+ 1
 
 # Now we use compute the accuracy
-pred = map(i -> indmax(probs[1:10, i]), 1:size(probs, 2))
+pred = map(i -> argmax(probs[1:10, i]), 1:size(probs, 2))
 correct = sum(pred .== labels)
 accuracy = 100correct/length(labels)
 @printf "Accuracy on eval set: %.2f%%\n" accuracy
diff --git a/julia/REQUIRE b/julia/REQUIRE
index 5a76dc543b25..b53f0c3cc0ec 100644
--- a/julia/REQUIRE
+++ b/julia/REQUIRE
@@ -3,5 +3,4 @@ Formatting
 BinDeps
 JSON
 MacroTools
-TakingBroadcastSeriously
 Reexport
diff --git a/julia/deps/build.jl b/julia/deps/build.jl
index bdc33be8c79b..7a37803f306a 100644
--- a/julia/deps/build.jl
+++ b/julia/deps/build.jl
@@ -15,7 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import JSON
+using JSON
+using Libdl
+using LinearAlgebra
 
 ################################################################################
 # First try to detect and load existing libmxnet
@@ -26,17 +28,17 @@ curr_win = "20180211"  # v1.1.0
 
 if haskey(ENV, "MXNET_HOME")
   MXNET_HOME = ENV["MXNET_HOME"]
-  info("MXNET_HOME environment detected: $MXNET_HOME")
-  info("Trying to load existing libmxnet...")
+  @info("MXNET_HOME environment detected: $MXNET_HOME")
+  @info("Trying to load existing libmxnet...")
   # In case of macOS, if user build libmxnet from source and set the MXNET_HOME,
   # the output is still named as `libmxnet.so`.
   lib = Libdl.find_library(["libmxnet.$(Libdl.dlext)", "libmxnet.so"],
                            [joinpath(MXNET_HOME, "lib"), MXNET_HOME])
   if !isempty(lib)
-    info("Existing libmxnet detected at $lib, skip building...")
+    @info("Existing libmxnet detected at $lib, skip building...")
     libmxnet_detected = true
   else
-    info("Failed to load existing libmxnet, trying to build from source...")
+    @info("Failed to load existing libmxnet, trying to build from source...")
   end
 end
 
@@ -44,33 +46,35 @@ end
 CUDAPATHS = String[]
 if haskey(ENV, "CUDA_HOME")
   push!(CUDAPATHS, joinpath(ENV["CUDA_HOME"], "lib64"))
-elseif is_linux()
+elseif Sys.islinux()
   append!(CUDAPATHS, ["/opt/cuda/lib64", "/usr/local/cuda/lib64"])
 end
 
-if is_unix()
+if Sys.isunix()
   try
-    push!(CUDAPATHS, replace(strip(readstring(`which nvcc`)), "bin/nvcc", "lib64"))
+    push!(CUDAPATHS, replace(strip(read(`which nvcc`, String)), "bin/nvcc", "lib64"))
+  catch
   end
 end
 
 HAS_CUDA = false
 HAS_CUDNN = false
 let cudalib = Libdl.find_library(["libcuda", "nvcuda.dll"], CUDAPATHS)
-  HAS_CUDA = !isempty(cudalib) && Libdl.dlopen_e(cudalib) != C_NULL
+  global HAS_CUDA = !isempty(cudalib) && Libdl.dlopen_e(cudalib) != C_NULL
 end
 
-if !HAS_CUDA && is_windows()
+if !HAS_CUDA && Sys.iswindows()
   # TODO: this needs to be improved.
   try
     run(`nvcc --version`)
-    HAS_CUDA = true
+    global HAS_CUDA = true
+  catch
   end
 end
 
 if HAS_CUDA  # then check cudnn
   let cudnnlib = Libdl.find_library("libcudnn", CUDAPATHS)
-    HAS_CUDNN = !isempty(cudnnlib) && Libdl.dlopen_e(cudnnlib) != C_NULL
+    global HAS_CUDNN = !isempty(cudnnlib) && Libdl.dlopen_e(cudnnlib) != C_NULL
     if HAS_CUDNN && !haskey(ENV, "CUDA_HOME")  # inference `CUDA_HOME`
       ENV["CUDA_HOME"] = dirname(dirname(cudnnlib))
     end
@@ -78,13 +82,13 @@ if HAS_CUDA  # then check cudnn
 end
 
 if HAS_CUDA
-  info("Found a CUDA installation.")
+  @info("Found a CUDA installation.")
   if HAS_CUDNN
-    info("Found a CuDNN installation.")
+    @info("Found a CuDNN installation.")
   end
-  info("CUDA_HOME -> $(get(ENV, "CUDA_HOME", nothing))")
+  @info("CUDA_HOME -> $(get(ENV, "CUDA_HOME", nothing))")
 else
-  info("Did not find a CUDA installation, using CPU-only version of MXNet.")
+  @info("Did not find a CUDA installation, using CPU-only version of MXNet.")
 end
 
 # propagate more build flags from ENV
@@ -98,26 +102,26 @@ function get_cpucore()
     if haskey(ENV, "TRAVIS")  # on travis-ci
         2
     else
-        min(Sys.CPU_CORES, 32)
+        min(Sys.CPU_THREADS, 32)
     end
 end
 
 using BinDeps
 @BinDeps.setup
 if !libmxnet_detected
-  if is_windows()
+  if Sys.iswindows()
     if Sys.ARCH != :x86_64
-      info("Prebuilt windows binaries are only available on 64bit. You will have to built MXNet yourself.")
+      @info("Prebuilt windows binaries are only available on 64bit. You will have to built MXNet yourself.")
       return
     end
-    info("Downloading pre-built packages for Windows.")
+    @info("Downloading pre-built packages for Windows.")
     base_url = "https://github.com/yajiedesign/mxnet/releases/download/weekly_binary_build_v2/prebuildbase_win10_x64_vc14_v2.7z"
 
     if libmxnet_curr_ver == "master"
       # download_cmd uses powershell 2, but we need powershell 3 to do this
       run(`powershell -NoProfile -Command Invoke-WebRequest -Uri "https://api.github.com/repos/yajiedesign/mxnet/releases/latest" -OutFile "mxnet.json"`)
       curr_win = JSON.parsefile("mxnet.json")["tag_name"]
-      info("Can't use MXNet master on Windows, using latest binaries from $curr_win.")
+      @info("Can't use MXNet master on Windows, using latest binaries from $curr_win.")
     end
     # TODO: Get url from JSON.
     name = "mxnet_x64_vc14_$(HAS_CUDA ? "gpu" : "cpu").7z"
@@ -144,8 +148,7 @@ if !libmxnet_detected
   ################################################################################
 
   blas_path = Libdl.dlpath(Libdl.dlopen(Base.libblas_name))
-
-  blas_vendor = Base.BLAS.vendor()
+  blas_vendor = LinearAlgebra.BLAS.vendor()
 
   ilp64 = ""
   if blas_vendor == :openblas64
@@ -154,18 +157,18 @@ if !libmxnet_detected
 
   FORCE_LAPACK = false
   if blas_vendor == :unknown
-    info("Julia is built with an unkown blas library ($blas_path).")
-    info("Attempting build without reusing the blas library")
+    @info("Julia is built with an unkown blas library ($blas_path).")
+    @info("Attempting build without reusing the blas library")
     USE_JULIA_BLAS = false
   elseif !(blas_vendor in (:openblas, :openblas64))
-    info("Unsure if we can build against $blas_vendor.")
-    info("Attempting build anyway.")
+    @info("Unsure if we can build against $blas_vendor.")
+    @info("Attempting build anyway.")
     USE_JULIA_BLAS = true
   else
     USE_JULIA_BLAS = true
     FORCE_LAPACK = true
   end
-  info("USE_JULIA_BLAS -> $USE_JULIA_BLAS")
+  @info("USE_JULIA_BLAS -> $USE_JULIA_BLAS")
 
   blas_name = blas_vendor == :openblas64 ? "openblas" : string(blas_vendor)
   MSHADOW_LDFLAGS = "MSHADOW_LDFLAGS=-lm $blas_path"
@@ -188,7 +191,7 @@ if !libmxnet_detected
       @build_steps begin
         BinDeps.DirectoryRule(_mxdir, @build_steps begin
           ChangeDirectory(_srcdir)
-          `git clone https://github.com/apache/incubator-mxnet`
+          `git clone https://github.com/apache/incubator-mxnet mxnet`
         end)
         @build_steps begin
           ChangeDirectory(_mxdir)
@@ -199,15 +202,12 @@ if !libmxnet_detected
             `git checkout origin/$libmxnet_curr_ver`
           end
           `git submodule update --init --recursive`
-          `git -C mshadow checkout -- make/mshadow.mk`
-
-          # copying on changed, make travis caching happy
-          `../../cpcblas.sh`
-
-          `sed -i -s "s/MSHADOW_CFLAGS = \(.*\)/MSHADOW_CFLAGS = \1 $ilp64/" mshadow/make/mshadow.mk`
+          `git -C 3rdparty/mshadow checkout -- make/mshadow.mk`
+          `cp -v ../../cblas.h include/cblas.h`
+          `sed -i -s "s/MSHADOW_CFLAGS = \(.*\)/MSHADOW_CFLAGS = \1 $ilp64/" 3rdparty/mshadow/make/mshadow.mk`
 
           # Copy config.mk, always override the file
-          if is_apple()
+          if Sys.isapple()
             `cp make/osx.mk config.mk`
           else
             `cp make/config.mk config.mk`
@@ -239,7 +239,7 @@ if !libmxnet_detected
           # Force enable LAPACK build
           # Julia's OpenBLAS has LAPACK functionality already
           if FORCE_LAPACK
-            if is_apple()
+            if Sys.isapple()
               MSHADOW_LDFLAGS *= " -framework Accelerate"
             end
             `sed -i -s 's/ADD_CFLAGS =\(.*\)/ADD_CFLAGS =\1 -DMXNET_USE_LAPACK/' config.mk`
diff --git a/julia/deps/cpcblas.sh b/julia/deps/cpcblas.sh
deleted file mode 100755
index 99342897a58c..000000000000
--- a/julia/deps/cpcblas.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/sh
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-# be invoked from build.jl
-
-set -e
-
-diff ../../cblas.h include/cblas.h || cp -v ../../cblas.h include/cblas.h
diff --git a/julia/docs/.gitignore b/julia/docs/.gitignore
new file mode 100644
index 000000000000..8a6e014fbbce
--- /dev/null
+++ b/julia/docs/.gitignore
@@ -0,0 +1,6 @@
+Manifest.toml
+
+_build
+build/
+site/
+venv/
diff --git a/julia/docs/Makefile b/julia/docs/Makefile
index 57c623889a83..e42b8cdccb93 100644
--- a/julia/docs/Makefile
+++ b/julia/docs/Makefile
@@ -16,5 +16,14 @@
 # under the License.
 
 all:
-	julia --color=yes ./make.jl
-	mkdocs build
+	julia --color=yes --project=./ -e \
+	  'using Pkg; \
+	   Pkg.develop(PackageSpec(name="MXNet", path = joinpath(pwd(), "..")))'
+	julia --color=yes --project=./ ./make.jl
+	pip install --user pygments mkdocs mkdocs-material python-markdown-math
+	~/.local/bin/mkdocs build
+
+clean:
+	rm -rvf venv
+	rm -rvf build
+	rm -rvf site
diff --git a/julia/docs/Project.toml b/julia/docs/Project.toml
new file mode 100644
index 000000000000..a4b243b0ffea
--- /dev/null
+++ b/julia/docs/Project.toml
@@ -0,0 +1,7 @@
+[deps]
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+DocumenterMarkdown = "997ab1e6-3595-5248-9280-8efb232c3433"
+MXNet = "a7949054-b901-59c6-b8e3-7238c29bf7f0"
+
+[compat]
+Documenter = "~0.21"
diff --git a/julia/docs/make.jl b/julia/docs/make.jl
index 6e3705a95fdc..3e541c636888 100644
--- a/julia/docs/make.jl
+++ b/julia/docs/make.jl
@@ -15,15 +15,13 @@
 # specific language governing permissions and limitations
 # under the License.
 
-using Documenter, MXNet
+using Documenter
+using DocumenterMarkdown
+using MXNet
 
 makedocs(
-  modules = MXNet,
-  doctest = false
-)
-
-deploydocs(
-  deps = Deps.pip("pygments", "mkdocs", "mkdocs-material", "python-markdown-math"),
-  repo = "github.com/dmlc/MXNet.jl.git",
-  julia = "0.6",
+  sitename = "MXNet.jl",
+  modules  = MXNet,
+  doctest  = false,
+  format   = Markdown(),
 )
diff --git a/julia/docs/mkdocs.yml b/julia/docs/mkdocs.yml
index 24281730885f..22cb71869673 100644
--- a/julia/docs/mkdocs.yml
+++ b/julia/docs/mkdocs.yml
@@ -41,7 +41,7 @@ markdown_extensions:
 
 docs_dir: 'build'
 
-pages:
+nav:
   - Home: index.md
   - Tutorial:
     - Digit Recognition on MNIST: tutorial/mnist.md
diff --git a/julia/docs/src/tutorial/mnist.md b/julia/docs/src/tutorial/mnist.md
index 76430fd1b1d0..916e46deb853 100644
--- a/julia/docs/src/tutorial/mnist.md
+++ b/julia/docs/src/tutorial/mnist.md
@@ -252,7 +252,7 @@ labels = reduce(
 labels .= labels .+ 1
 
 # Now we use compute the accuracy
-pred = map(i -> indmax(probs[1:10, i]), 1:size(probs, 2))
+pred = map(i -> argmax(probs[1:10, i]), 1:size(probs, 2))
 correct = sum(pred .== labels)
 @printf "Accuracy on eval set: %.2f%%\n" 100correct/length(labels)
 ```
diff --git a/julia/examples/char-lstm/lstm.jl b/julia/examples/char-lstm/lstm.jl
index fc4bcc4b6a91..6f6640e9562b 100644
--- a/julia/examples/char-lstm/lstm.jl
+++ b/julia/examples/char-lstm/lstm.jl
@@ -145,7 +145,16 @@ function mx.update!(metric::NLL, labels::Vector{<:mx.NDArray}, preds::Vector{<:m
   nll = 0.0
   for (label, pred) in zip(labels, preds)
     @mx.nd_as_jl ro=(label, pred) begin
-      nll -= sum(log.(max.(broadcast_getindex(pred, round.(Int,label+1), 1:length(label)), 1e-20)))
+      nll -= sum(
+        log.(
+          max.(
+            getindex.(
+            (pred,),
+            round.(Int,label .+ 1),
+            1:length(label)),
+          1e-20)
+        )
+      )
     end
   end
 
diff --git a/julia/examples/char-lstm/seq-data.jl b/julia/examples/char-lstm/seq-data.jl
index 3489e5bc3c39..0df110322317 100644
--- a/julia/examples/char-lstm/seq-data.jl
+++ b/julia/examples/char-lstm/seq-data.jl
@@ -16,13 +16,14 @@
 # under the License.
 
 # Simple data provider that load text
-using Iterators
+using Base.Iterators
 using MXNet
 
-function build_vocabulary(corpus_fn::AbstractString, vocab_fn::AbstractString; max_vocab=10000)
+function build_vocabulary(corpus_fn::AbstractString, vocab_fn::AbstractString;
+                          max_vocab = 10000)
   if isfile(vocab_fn)
-    info("Vocabulary already exists, reusing $vocab_fn...")
-    vocab = Dict{Char,Int}(w => i for (i,w) in enumerate(readstring(vocab_fn)))
+    @info("Vocabulary already exists, reusing $vocab_fn...")
+    vocab = Dict{Char,Int}(w => i for (i,w) in enumerate(read(vocab_fn, String)))
   else
     # count symbol frequency
     dict = Dict{Char,Int}()
@@ -99,8 +100,8 @@ function mx.eachbatch(p::CharSeqProvider)
 
     for idx_batch in partition(idx_all, p.batch_size*p.seq_len)
       for i = 1:p.seq_len
-        data_jl[i][:] = 0
-        label_jl[i][:] = 0
+        data_jl[i][:] .= 0
+        label_jl[i][:] .= 0
       end
 
       for (i, idx_seq) in enumerate(partition(idx_batch, p.seq_len))
diff --git a/julia/examples/char-lstm/train.jl b/julia/examples/char-lstm/train.jl
index 57bfeb6b6e11..7dbdefd42e41 100644
--- a/julia/examples/char-lstm/train.jl
+++ b/julia/examples/char-lstm/train.jl
@@ -31,7 +31,7 @@ lstm = LSTM(LSTM_N_LAYER, SEQ_LENGTH, DIM_HIDDEN, DIM_EMBED,
 
 #--data
 # load data
-text_all  = readstring(INPUT_FILE)
+text_all  = read(INPUT_FILE, String)
 len_train = round(Int, length(text_all)*DATA_TR_RATIO)
 text_tr   = text_all[1:len_train]
 text_val  = text_all[len_train+1:end]
diff --git a/julia/examples/char-lstm/visualize.jl b/julia/examples/char-lstm/visualize.jl
index e2a2c87c9c10..dd483940095e 100644
--- a/julia/examples/char-lstm/visualize.jl
+++ b/julia/examples/char-lstm/visualize.jl
@@ -15,8 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
-include(joinpath(dirname(@__FILE__), "config.jl"))
-include(joinpath(dirname(@__FILE__), "lstm.jl"))
+include(joinpath(@__DIR__, "config.jl"))
+include(joinpath(@__DIR__, "lstm.jl"))
 
 using MXNet
 
diff --git a/julia/examples/imagenet/ijulia-pretrained-predict/Prediction with Pre-trained Model.ipynb b/julia/examples/imagenet/ijulia-pretrained-predict/Prediction with Pre-trained Model.ipynb
index 2d5d0ee7dd97..3ef8040c30f6 100644
--- a/julia/examples/imagenet/ijulia-pretrained-predict/Prediction with Pre-trained Model.ipynb	
+++ b/julia/examples/imagenet/ijulia-pretrained-predict/Prediction with Pre-trained Model.ipynb	
@@ -181,7 +181,7 @@
     "classes = open(joinpath(model_dir, \"synset.txt\")) do s \n",
     "    map(x -> replace(strip(x), r\"^n[0-9]+ \", \"\"), readlines(s))\n",
     "end\n",
-    "println(classes[indmax(pred)])"
+    "println(classes[argmax(pred)])"
    ]
   },
   {
diff --git a/julia/examples/mnist/mlp-test.jl b/julia/examples/mnist/mlp-test.jl
index 1af84ed3ba8a..7a24f9281652 100644
--- a/julia/examples/mnist/mlp-test.jl
+++ b/julia/examples/mnist/mlp-test.jl
@@ -21,7 +21,7 @@
 module MNISTTest
 
 using MXNet
-using Base.Test
+using Test
 
 include("mnist-data.jl")
 
@@ -56,7 +56,7 @@ function mnist_fit_and_predict(optimizer, initializer, n_epoch)
   end
   mlp_load = mx.load("$cp_prefix-symbol.json", mx.SymbolicNode)
   @test mx.to_json(mlp_load) == mx.to_json(mlp)
-  mlp_load = mx.from_json(readstring("$cp_prefix-symbol.json"), mx.SymbolicNode)
+  mlp_load = mx.from_json(read("$cp_prefix-symbol.json", String), mx.SymbolicNode)
   @test mx.to_json(mlp_load) == mx.to_json(mlp)
 
   #--------------------------------------------------------------------------------
@@ -68,13 +68,13 @@ function mnist_fit_and_predict(optimizer, initializer, n_epoch)
   for batch in eval_provider
     push!(labels, copy(mx.get(eval_provider, batch, :softmax_label)))
   end
-  labels = cat(1, labels...)
+  labels = cat(labels..., dims = 1)
 
   # Now we use compute the accuracy
   correct = 0
   for i = 1:length(labels)
     # labels are 0...9
-    if indmax(probs[:,i]) == labels[i]+1
+    if argmax(probs[:,i]) == labels[i]+1
       correct += 1
     end
   end
@@ -88,32 +88,32 @@ function mnist_fit_and_predict(optimizer, initializer, n_epoch)
 end
 
 function test_mnist_mlp()
-  info("MNIST::SGD")
+  @info("MNIST::SGD")
   @test mnist_fit_and_predict(mx.SGD(η=.2), mx.UniformInitializer(.01), 2) > 90
 
-  info("MNIST::SGD::η scheduler")
+  @info("MNIST::SGD::η scheduler")
   @test mnist_fit_and_predict(mx.SGD(η_sched=mx.LearningRate.Inv(.25)),
                               mx.UniformInitializer(.01), 2) > 90
 
-  info("MNIST::SGD::momentum μ")
+  @info("MNIST::SGD::momentum μ")
   @test mnist_fit_and_predict(mx.SGD(η=.1, μ=.9), mx.UniformInitializer(.01), 2) > 90
 
-  info("MNIST::ADAM")
+  @info("MNIST::ADAM")
   @test mnist_fit_and_predict(mx.ADAM(), mx.NormalInitializer(), 2) > 90
 
-  info("MNIST::AdaGrad")
+  @info("MNIST::AdaGrad")
   @test mnist_fit_and_predict(mx.AdaGrad(), mx.NormalInitializer(), 2) > 90
 
-  info("MNIST::AdaDelta")
+  @info("MNIST::AdaDelta")
   @test mnist_fit_and_predict(mx.AdaDelta(), mx.NormalInitializer(), 2) > 90
 
-  info("MNIST::AdaMax")
+  @info("MNIST::AdaMax")
   @test mnist_fit_and_predict(mx.AdaMax(), mx.NormalInitializer(), 2) > 90
 
-  info("MNIST::RMSProp")
+  @info("MNIST::RMSProp")
   @test mnist_fit_and_predict(mx.RMSProp(), mx.NormalInitializer(), 2) > 90
 
-  info("MNIST::Nadam")
+  @info("MNIST::Nadam")
   @test mnist_fit_and_predict(mx.Nadam(), mx.NormalInitializer(), 2) > 90
 end
 
diff --git a/julia/examples/mnist/mlp.jl b/julia/examples/mnist/mlp.jl
index 20facc9b71b3..86111ee68172 100644
--- a/julia/examples/mnist/mlp.jl
+++ b/julia/examples/mnist/mlp.jl
@@ -70,6 +70,6 @@ labels = reduce(
 labels .= labels .+ 1
 
 # Now we use compute the accuracy
-pred = map(i -> indmax(probs[1:10, i]), 1:size(probs, 2))
+pred = map(i -> argmax(probs[1:10, i]), 1:size(probs, 2))
 correct = sum(pred .== labels)
 @printf "Accuracy on eval set: %.2f%%\n" 100correct/length(labels)
diff --git a/julia/src/MXNet.jl b/julia/src/MXNet.jl
index 03c3cb89b530..febd80cc8f8c 100644
--- a/julia/src/MXNet.jl
+++ b/julia/src/MXNet.jl
@@ -15,8 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-__precompile__()
-
 module MXNet
 
 using Reexport
@@ -26,16 +24,20 @@ using Reexport
 export mx
 module mx
 
-import Base.Iterators: filter
+using Base.Broadcast: Broadcasted, DefaultArrayStyle
+using Libdl
+using LinearAlgebra
+using Markdown
+using Printf
+using Statistics
+using Random
 
 using Formatting
 using MacroTools
-using TakingBroadcastSeriously: @unfuse
-import TakingBroadcastSeriously: broadcast_
 
 # Functions from base that we can safely extend and that are defined by libmxnet.
-import Base: round, ceil, floor, cos, sin, abs, sign, exp, sqrt, exp, log, norm,
-             transpose
+import Base.Broadcast: broadcasted
+import Base.Iterators: filter
 
 ###############################################################################
 #  exports
@@ -140,9 +142,9 @@ include("base.jl")
 
 include("context.jl")
 include("util.jl")
-include("broadcast.jl")
 
 include("ndarray.jl")
+
 include("random.jl")
 include("autograd.jl")
 
@@ -150,6 +152,8 @@ include("name.jl")
 include("symbolic-node.jl")
 include("executor.jl")
 
+include("broadcast.jl")
+
 include("metric.jl")
 include("optimizer.jl")
 include("initializer.jl")
diff --git a/julia/src/autograd.jl b/julia/src/autograd.jl
index 72fb82ba1bbb..8b5edae5770a 100644
--- a/julia/src/autograd.jl
+++ b/julia/src/autograd.jl
@@ -43,7 +43,7 @@ function _set_recording(state::Bool)::Bool
   prev[]
 end
 
-_set_recording(::Void) = nothing
+_set_recording(::Cvoid) = nothing
 
 """
 Set status to training/predicting.
@@ -63,7 +63,7 @@ function _set_training(train_mode::Bool)::Bool
   prev[]
 end
 
-_set_training(::Void) = nothing
+_set_training(::Cvoid) = nothing
 
 ###############################################################################
 #  Public API
@@ -91,7 +91,7 @@ function is_training()::Bool
   state[]
 end
 
-@inline function _record(f, is_record::Union{Void,Bool}, train_mode::Union{Void,Bool})
+@inline function _record(f, is_record::Union{Cvoid,Bool}, train_mode::Union{Cvoid,Bool})
   # Port from Python's `_RecordingStateScope` context manager
   # __enter__
   prev_is_record = _set_recording(is_record)
@@ -211,12 +211,12 @@ Compute the gradients of heads w.r.t previously marked variables.
 
 - `head::NDArray`: output NDArray
 
-- `head_grad::NDArray` or `Void`: gradient coefficient with respect to head.
+- `head_grad::NDArray` or `Cvoid`: gradient coefficient with respect to head.
 
 - `heads::Vector{NDArray}`: a list of output NDArray
 
 - `head_grads::Vector`: a list of gradient coefficient with respect ot heads.
-  the element should be `NDArray` or `Void`
+  the element should be `NDArray` or `Cvoid`
 
 - `retain_graph::Bool`: whether to keep the graph after backward. e.g:
   If you want to differentiate the same graph twice,
@@ -227,10 +227,10 @@ Compute the gradients of heads w.r.t previously marked variables.
 backward!(head::NDArray, head_grad::NDArray; kws...) =
   backward!([head], [head_grad]; kws...)
 
-backward!(head::NDArray, head_grad::Void = nothing; kws...) =
+backward!(head::NDArray, head_grad::Cvoid = nothing; kws...) =
   backward!([head], head_grad; kws...)
 
-function backward!(heads::VecOfNDArray, head_grad::Void;
+function backward!(heads::VecOfNDArray, head_grad::Cvoid;
                    retain_graph::Bool = false, train_mode::Bool = true)
   @mxcall(
     :MXAutogradBackwardEx,
@@ -262,10 +262,10 @@ function backward!(heads::VecOfNDArray, head_grads::Vector;
   ograd_handles  = map(head_grads) do x
     if x isa NDArray
       x.handle
-    elseif x isa Void
+    elseif x ≡ nothing  # faster than `x isa Cvoid` in Julia 0.7
       MX_handle(C_NULL)
     else
-      throw(ArgumentError("element of head_grads should be NDArray or Void"))
+      throw(ArgumentError("element of head_grads should be NDArray or Cvoid"))
     end
   end
   @assert length(output_handles) == length(ograd_handles)
diff --git a/julia/src/base.jl b/julia/src/base.jl
index ce1c183eafb5..61779d194a94 100644
--- a/julia/src/base.jl
+++ b/julia/src/base.jl
@@ -27,7 +27,7 @@ Base.show(io::IO, e::MXError) = print(io, e.msg)
 ################################################################################
 const MX_uint = Cuint
 const MX_float = Cfloat
-const MX_handle = Ptr{Void}
+const MX_handle = Ptr{Cvoid}
 
 const char_p = Ptr{UInt8}
 const char_pp = Ptr{char_p}
@@ -50,7 +50,10 @@ const grad_req_map = Dict{Symbol,GRAD_REQ}(
 const MXNET_LIB = Libdl.find_library(["libmxnet.$(Libdl.dlext)", "libmxnet.so"],  # see build.jl
                                      [joinpath(get(ENV, "MXNET_HOME", ""), "lib"),
                                       get(ENV, "MXNET_HOME", ""),
-                                      Pkg.dir("MXNet", "deps", "usr", "lib")])
+                                      joinpath(@__DIR__, "..",
+                                               "deps", "usr", "lib")])
+const LIB_VERSION = Ref{Cint}(0)
+
 if isempty(MXNET_LIB)
   # touch this file, so that after the user properly build libmxnet, the precompiled
   # MXNet.ji will be re-compiled to get MXNET_LIB properly.
@@ -65,8 +68,7 @@ function __init__()
   # TODO: bug in nnvm, if do not call this, call get handle "_copyto" will fail
   _get_libmx_op_names()
   _populate_iter_creator_cache!()
-
-  global const LIB_VERSION = _get_lib_version()
+  _get_lib_version!()
 
   atexit() do
     # notify libmxnet we are shutting down
@@ -87,8 +89,8 @@ macro mxcall(fv, argtypes, args...)
   f = eval(fv)
   args = map(esc, args)
   quote
-    _mxret = ccall( ($(Meta.quot(f)), $MXNET_LIB),
-                    Cint, $argtypes, $(args...) )
+    _mxret = ccall(($(QuoteNode(f)), $MXNET_LIB),
+                   Cint, $argtypes, $(args...))
     if _mxret != 0
       err_msg = mx_get_last_error()
       throw(MXError(err_msg))
@@ -98,38 +100,38 @@ end
 
 """
 Get libmxnet version
+
+This function will changes the global variable `LIB_VERSION`.
 """
-function _get_lib_version()
-  ver = Ref{Cint}(0)
-  @mxcall :MXGetVersion (Ref{Cint},) ver
-  ver[]
+function _get_lib_version!()
+  @mxcall :MXGetVersion (Ref{Cint},) LIB_VERSION
+  LIB_VERSION[]
 end
 
 ################################################################################
 # Handle types
 ################################################################################
-macro mx_define_handle_t(name, destructor)
-  name = esc(name)
-  quote
+function mx_define_handle_t(name, destructor)
+  @eval begin
     mutable struct $name
-      value :: MX_handle
+      value::MX_handle
 
       function $name(value = C_NULL)
         hdr = new(value)
 
-        $(if destructor != :nop
-          :(finalizer(hdr, delete!))
+        $(if destructor != nothing
+          :(finalizer(delete!, hdr))
         end)
 
         return hdr
       end
     end
 
-    $(if finalizer != :nop
+    $(if finalizer != nothing
       quote
         function delete!(h :: $name)
           if h.value != C_NULL
-            @mxcall($(Meta.quot(destructor)), (MX_handle,), h.value)
+            @mxcall($(QuoteNode(destructor)), (MX_handle,), h.value)
             h.value = C_NULL
           end
         end
@@ -142,16 +144,16 @@ macro mx_define_handle_t(name, destructor)
     Base.convert(t::Type{MX_handle}, obj::$name) = Base.unsafe_convert(t, obj)
     Base.cconvert(t::Type{MX_handle}, obj::$name) = Base.unsafe_convert(t, obj)
 
-    function Base.isnull(obj::$name) obj.value == C_NULL end
+    MX_handle(x::$name) = Base.convert(MX_handle, x)
   end
 end
 
-@mx_define_handle_t(MX_NDArrayHandle, MXNDArrayFree)
-@mx_define_handle_t(MX_OpHandle, nop)
-@mx_define_handle_t(MX_SymbolHandle, MXSymbolFree)
-@mx_define_handle_t(MX_ExecutorHandle, MXExecutorFree)
-@mx_define_handle_t(MX_DataIterHandle, MXDataIterFree)
-@mx_define_handle_t(MX_KVStoreHandle, MXKVStoreFree)
+mx_define_handle_t(:MX_NDArrayHandle,  :MXNDArrayFree)
+mx_define_handle_t(:MX_OpHandle,       nothing)
+mx_define_handle_t(:MX_SymbolHandle,   :MXSymbolFree)
+mx_define_handle_t(:MX_ExecutorHandle, :MXExecutorFree)
+mx_define_handle_t(:MX_DataIterHandle, :MXDataIterFree)
+mx_define_handle_t(:MX_KVStoreHandle,  :MXKVStoreFree)
 
 ################################################################################
 # MXNet Params
@@ -183,8 +185,8 @@ dump_mx_param(val::Float64)    = @sprintf("%.16e", val)
 dump_mx_param(val::Float32)    = @sprintf("%.8e", val)
 dump_mx_param(val::Float16)    = @sprintf("%.4e", val)
 dump_mx_param(val::Irrational) = @sprintf("%.16e", val)
-dump_mx_param(shape::NTuple{N, <:Integer}) where N =
-  string(tuple(flipdim([shape...], 1)...))
+dump_mx_param(shape::NTuple{N,<:Integer}) where N =
+  string(reverse(shape))
 
 
 """
@@ -248,11 +250,11 @@ function _defstruct_impl(is_immutable, name, fields)
     name       = esc(name.args[1])
   end
 
-  field_defs     = Vector{Expr}(length(fields))        # :(field2 :: Int)
-  field_names    = Vector{Expr}(length(fields))        # :field2
-  field_defaults = Vector{Expr}(length(fields))        # :(field2 = 0)
-  field_types    = Vector{Expr}(length(fields))        # Int
-  field_asserts  = Vector{Expr}(length(fields))        # :(field2 >= 0)
+  field_defs     = Vector{Expr}(undef, length(fields))  # :(field2 :: Int)
+  field_names    = Vector{Expr}(undef, length(fields))  # :field2
+  field_defaults = Vector{Expr}(undef, length(fields))  # :(field2 = 0)
+  field_types    = Vector{Expr}(undef, length(fields))  # Int
+  field_asserts  = Vector{Expr}(undef, length(fields))  # :(field2 >= 0)
   required_field = Symbol[]
 
   for i = 1:length(fields)
diff --git a/julia/src/broadcast.jl b/julia/src/broadcast.jl
index fee960a46271..7c68fab5007c 100644
--- a/julia/src/broadcast.jl
+++ b/julia/src/broadcast.jl
@@ -15,22 +15,17 @@
 # specific language governing permissions and limitations
 # under the License.
 
-using TakingBroadcastSeriously: Broadcasted, unwrap
+struct NDArrayStyle{N} <: Broadcast.AbstractArrayStyle{N} end
+NDArrayStyle(::Val{N}) where N        = NDArrayStyle{N}()
+NDArrayStyle{M}(::Val{N}) where {N,M} = NDArrayStyle{N}()
 
-for f in :[%,
-           tan, asin, acos, atan,
-           sinh, cosh, tanh, asinh, acosh, atanh,
-           min, max,
-           hypot].args
-  # copy from TakingBroadcastSeriously
-  @eval Base.$f(a::Broadcasted...) = Broadcasted(broadcast_($f, unwrap.(a)...))
-  @eval Base.$f(a::Broadcasted, b) = Broadcasted(broadcast_($f, unwrap(a), b))
-  @eval Base.$f(b, a::Broadcasted) = Broadcasted(broadcast_($f, b, unwrap(a)))
-end
+# Determin the output type
+Base.BroadcastStyle(::Type{<:NDArray{T,N}}) where {T,N} = NDArrayStyle{N}()
 
-for f in :[σ, sigmoid, relu, softmax, log_softmax].args
-  # copy from TakingBroadcastSeriously
-  @eval $f(a::Broadcasted...) = Broadcasted(broadcast_($f, unwrap.(a)...))
-  @eval $f(a::Broadcasted, b) = Broadcasted(broadcast_($f, unwrap(a), b))
-  @eval $f(b, a::Broadcasted) = Broadcasted(broadcast_($f, b, unwrap(a)))
-end
+Base.broadcastable(x::NDArray) = x
+
+# Make it non-lazy
+broadcasted(f, x::NDArray, args...)    = f(x, args...)
+broadcasted(f, y, x::NDArray, args...) = f(y, x, args...)
+broadcasted(f, x::NDArray{T,N}, y::NDArray{T,N}, args...) where {T,N} =
+  f(x, y, args...)
diff --git a/julia/src/callback.jl b/julia/src/callback.jl
index 06e431de06d0..39d8f2552035 100644
--- a/julia/src/callback.jl
+++ b/julia/src/callback.jl
@@ -96,7 +96,7 @@ function speedometer(;frequency::Int = 50)
       cl_tic = time()
     else
       speed = frequency * state.batch_size / (time() - cl_tic)
-      info(format("Speed: {1:>6.2f} samples/sec", speed))
+      @info(format("Speed: {1:>6.2f} samples/sec", speed))
       cl_tic = time()
     end
   end
diff --git a/julia/src/context.jl b/julia/src/context.jl
index c97522b3b846..71aee3020daa 100644
--- a/julia/src/context.jl
+++ b/julia/src/context.jl
@@ -17,17 +17,22 @@
 
 @enum CONTEXT_TYPE CPU=1 GPU=2 CPU_PINNED=3
 
+Base.convert(::Type{CONTEXT_TYPE}, x::Integer) = CONTEXT_TYPE(x)
+
 """
     Context(dev_type, dev_id)
 
 A context describes the device type and id on which computation should be carried on.
 """
 struct Context
-  device_type :: CONTEXT_TYPE
-  device_id   :: Int
+  device_type::CONTEXT_TYPE
+  device_id::Int
+
+  Context(dev_type::CONTEXT_TYPE, dev_id::Integer = 0) = new(dev_type, dev_id)
 end
-Context(dev_type :: Union{CONTEXT_TYPE, Int}, dev_id :: Int = 0) =
-    Context(convert(CONTEXT_TYPE, dev_type), dev_id)
+
+Context(dev_type::Integer, dev_id::Integer = 0) =
+  Context(convert(CONTEXT_TYPE, dev_type), dev_id)
 
 Base.show(io::IO, ctx::Context) =
   print(io, "$(ctx.device_type)$(ctx.device_id)")
@@ -39,9 +44,9 @@ Get a CPU context with a specific id. `cpu()` is usually the default context for
 operations when no context is specified.
 
 # Arguments
-* `dev_id::Int = 0`: the CPU id.
+* `dev_id::Integer = 0`: the CPU id.
 """
-cpu(dev_id::Int = 0) = Context(CPU, dev_id)
+cpu(dev_id::Integer = 0) = Context(CPU, dev_id)
 
 """
     gpu(dev_id)
@@ -49,6 +54,6 @@ cpu(dev_id::Int = 0) = Context(CPU, dev_id)
 Get a GPU context with a specific id. The K GPUs on a node is typically numbered as 0,...,K-1.
 
 # Arguments
-* `dev_id :: Int = 0` the GPU device id.
+* `dev_id::Integer = 0` the GPU device id.
 """
-gpu(dev_id::Int = 0) = return Context(GPU, dev_id)
+gpu(dev_id::Integer = 0) = Context(GPU, dev_id)
diff --git a/julia/src/deprecated.jl b/julia/src/deprecated.jl
index 12c5345aa198..32819810eb8d 100644
--- a/julia/src/deprecated.jl
+++ b/julia/src/deprecated.jl
@@ -44,26 +44,26 @@
 # @deprecate make `randn` exported accidentially
 # so we make the depwarn manually
 function randn(μ, σ, dims::NTuple{N,Int}, ctx::Context = cpu()) where N
-  warn("mx.randn(μ, σ, dims, ctx = cpu()) is deprecated, use " *
-       "mx.randn(dims...; μ = μ, σ = σ, context = ctx) instead.")
+  @warn("mx.randn(μ, σ, dims, ctx = cpu()) is deprecated, use " *
+        "mx.randn(dims...; μ = μ, σ = σ, context = ctx) instead.")
   mx.randn(dims...; μ = μ, σ = σ, context = ctx)
 end
 
 function randn!(μ, σ, x::NDArray)
-  warn("mx.randn!(μ, σ, x::NDArray) is deprecated, use " *
-       "mx.randn!(x; μ = μ, σ = σ) instead.")
+  @warn("mx.randn!(μ, σ, x::NDArray) is deprecated, use " *
+        "mx.randn!(x; μ = μ, σ = σ) instead.")
   randn!(x; μ = μ, σ = σ)
 end
 
 function rand!(low::Real, high::Real, x::NDArray)
-  warn("rand!(low, high, x::NDArray) is deprecated, use " *
-       "rand!(x, low = low, high = high) instead.")
+  @warn("rand!(low, high, x::NDArray) is deprecated, use " *
+        "rand!(x, low = low, high = high) instead.")
   rand!(x, low = low, high = high)
 end
 
 function rand(low::Real, high::Real, dims::NTuple{N,Int}, context::Context = cpu()) where N
-  warn("rand!(low, high, dims, x::NDArray, context = cpu()) is deprecated, use " *
-       "rand!(dims..., x; low = low, high = high, context = cpu()) instead.")
+  @warn("rand!(low, high, dims, x::NDArray, context = cpu()) is deprecated, use " *
+        "rand!(dims..., x; low = low, high = high, context = cpu()) instead.")
   rand(dims...; low = low, high = high, context = context)
 end
 
@@ -75,86 +75,97 @@ end
 @deprecate clip(x; a_min = 0, a_max = 0) clip(x, a_min, a_max)
 
 function broadcast_plus(x::NDArray, y::NDArray)
-  warn("broadcast_plus(x, y) is deprecated, use x .+ y instead.")
+  @warn("broadcast_plus(x, y) is deprecated, use x .+ y instead.")
   x .+ y
 end
 
 function broadcast_add(x::NDArray, y::NDArray)
-  warn("broadcast_add(x, y) is deprecated, use x .+ y instead.")
+  @warn("broadcast_add(x, y) is deprecated, use x .+ y instead.")
   x .+ y
 end
 
 function broadcast_sub(x::NDArray, y::NDArray)
-  warn("broadcast_sub(x, y) is deprecated, use x .- y instead.")
+  @warn("broadcast_sub(x, y) is deprecated, use x .- y instead.")
   x .- y
 end
 
 function broadcast_minus(x::NDArray, y::NDArray)
-  warn("broadcast_minus(x, y) is deprecated, use x .- y instead.")
+  @warn("broadcast_minus(x, y) is deprecated, use x .- y instead.")
   x .- y
 end
 
 function broadcast_mul(x::NDArray, y::NDArray)
-  warn("broadcast_mul(x, y) is deprecated, use x .* y instead.")
+  @warn("broadcast_mul(x, y) is deprecated, use x .* y instead.")
   x .* y
 end
 
 function broadcast_div(x::NDArray, y::NDArray)
-  warn("broadcast_div(x, y) is deprecated, use x ./ y instead.")
+  @warn("broadcast_div(x, y) is deprecated, use x ./ y instead.")
   x ./ y
 end
 
 function broadcast_mod(x::NDArray, y::NDArray)
-  warn("broadcast_mod(x, y) is deprecated, use x .% y instead.")
+  @warn("broadcast_mod(x, y) is deprecated, use x .% y instead.")
   x .% y
 end
 
 function broadcast_power(x::NDArray, y::NDArray)
-  warn("broadcast_power(x, y) is deprecated, use x.^y instead.")
+  @warn("broadcast_power(x, y) is deprecated, use x.^y instead.")
   x.^y
 end
 
 function broadcast_equal(x::NDArray, y::NDArray)
-  warn("broadcast_equal(x, y) is deprecated, use x .== y instead.")
+  @warn("broadcast_equal(x, y) is deprecated, use x .== y instead.")
   x .== y
 end
 
 function broadcast_not_equal(x::NDArray, y::NDArray)
-  warn("broadcast_not_equal(x, y) is deprecated, use x .== y instead.")
+  @warn("broadcast_not_equal(x, y) is deprecated, use x .== y instead.")
   x .!= y
 end
 
 function broadcast_greater(x::NDArray, y::NDArray)
-  warn("broadcast_greater(x, y) is deprecated, use x .== y instead.")
+  @warn("broadcast_greater(x, y) is deprecated, use x .== y instead.")
   x .> y
 end
 
 function broadcast_greater_equal(x::NDArray, y::NDArray)
-  warn("broadcast_greater_equal(x, y) is deprecated, use x .== y instead.")
+  @warn("broadcast_greater_equal(x, y) is deprecated, use x .== y instead.")
   x .>= y
 end
 
 function broadcast_lesser(x::NDArray, y::NDArray)
-  warn("broadcast_lesser(x, y) is deprecated, use x .== y instead.")
+  @warn("broadcast_lesser(x, y) is deprecated, use x .== y instead.")
   x .< y
 end
 
 function broadcast_lesser_equal(x::NDArray, y::NDArray)
-  warn("broadcast_lesser_equal(x, y) is deprecated, use x .== y instead.")
+  @warn("broadcast_lesser_equal(x, y) is deprecated, use x .== y instead.")
   x .<= y
 end
 
 function broadcast_maximum(x::NDArray, y::NDArray)
-  warn("broadcast_maximum(x, y) is deprecated, use max.(x, y) instead.")
+  @warn("broadcast_maximum(x, y) is deprecated, use max.(x, y) instead.")
   max.(x, y)
 end
 
 function broadcast_minimum(x::NDArray, y::NDArray)
-  warn("broadcast_minimum(x, y) is deprecated, use min.(x, y) instead.")
+  @warn("broadcast_minimum(x, y) is deprecated, use min.(x, y) instead.")
   min.(x, y)
 end
 
 function broadcast_hypot(x::NDArray, y::NDArray)
-  warn("broadcast_hypot(x, y) is deprecated, use hypot.(x, y) instead.")
+  @warn("broadcast_hypot(x, y) is deprecated, use hypot.(x, y) instead.")
   hypot.(x, y)
 end
+
+# Introduced by https://github.com/apache/incubator-mxnet/pull/12845
+import Base: sum, maximum, minimum, prod, cat
+@deprecate sum(x::NDArray, dims) sum(x, dims = dims)
+@deprecate maximum(x::NDArray, dims) maximum(x, dims = dims)
+@deprecate minimum(x::NDArray, dims) minimum(x, dims = dims)
+@deprecate prod(x::NDArray, dims) prod(x, dims = dims)
+@deprecate cat(dims, As::NDArray{T}...) where T cat(As..., dims = dims)
+
+import Statistics: mean
+@deprecate mean(x::NDArray, dims) mean(x, dims = dims)
diff --git a/julia/src/executor.jl b/julia/src/executor.jl
index 4bf4339d65d1..29c21c8f481f 100644
--- a/julia/src/executor.jl
+++ b/julia/src/executor.jl
@@ -28,7 +28,7 @@ mutable struct Executor
   handle :: MX_ExecutorHandle
   symbol :: SymbolicNode
   arg_arrays  :: VecOfNDArray
-  grad_arrays :: Vector{Union{Void,<:NDArray}}
+  grad_arrays :: Vector{Union{Cvoid,<:NDArray}}
   aux_arrays  :: VecOfNDArray
   outputs     :: VecOfNDArray
   arg_dict    :: Dict{Symbol}
@@ -73,17 +73,17 @@ function _get_ndarray_inputs(arg_key::AbstractString, args::Dict{Symbol},
   args_vec = map(arg_names) do name
     arr = get(args, name, nothing)
     if !allow_missing
-      @assert(!isa(arr, Void), "Must specify all arguments in $arg_key ($name is missing)")
+      @assert(!isa(arr, Cvoid), "Must specify all arguments in $arg_key ($name is missing)")
     end
     arr
   end
   # help the type inference
   if allow_missing
-    args_vec = Union{NDArray,Void}[args_vec...]
+    args_vec = Union{NDArray,Cvoid}[args_vec...]
   else
     args_vec = NDArray[args_vec...]
   end
-  args_hdr = MX_handle[(isa(x,Void) ? MX_handle(0) : x) for x in args_vec]
+  args_hdr = MX_handle[(isa(x,Cvoid) ? MX_handle(0) : x) for x in args_vec]
   return (args_hdr, args_vec)
 end
 
@@ -115,12 +115,12 @@ function bind(self::SymbolicNode, ctx::Context, args;
   aux_args_hdr, aux_states = _get_ndarray_inputs("aux_states", aux_states, list_auxiliary_states(self), false)
 
   if isa(grad_req, GRAD_REQ)
-    reqs = MX_uint[grad_req for i=1:length(args)]
+    reqs = MX_uint[MX_uint(grad_req) for i=1:length(args)]
   elseif isa(grad_req, Vector{GRAD_REQ})
     @assert(length(grad_req) == length(args))
-    reqs = MX_uint[grad_req...]
+    reqs = MX_uint[MX_uint.(grad_req)...]
   elseif isa(grad_req, Dict{Symbol, GRAD_REQ})
-    reqs = MX_uint[get(grad_req, name, GRAD_NOP) for name in arg_names]
+    reqs = MX_uint[MX_uint(get(grad_req, name, GRAD_NOP)) for name in arg_names]
   end
 
   ref_hdr = Ref{MX_handle}(0)
@@ -129,7 +129,7 @@ function bind(self::SymbolicNode, ctx::Context, args;
            MX_uint, Ptr{MX_handle}, Ref{MX_handle}),
           self, ctx.device_type, ctx.device_id, length(args), args_hdr,
           args_grad_hdr, reqs, length(aux_states), aux_args_hdr, ref_hdr)
-  args_grad = convert(Vector{Union{Void,NDArray}}, args_grad)
+  args_grad = convert(Vector{Union{Cvoid,NDArray}}, args_grad)
   executor = Executor(MX_ExecutorHandle(ref_hdr[]), self,
                       args, args_grad, aux_states)
 end
@@ -145,7 +145,7 @@ function simple_bind(self::SymbolicNode, ctx::Context;
                      grad_req::Union{GRAD_REQ,Dict{Symbol,GRAD_REQ}} = GRAD_WRITE,
                      kwargs...)
   arg_shapes, out_shapes, aux_shapes = infer_shape(self; kwargs...)
-  @assert(!isa(arg_shapes, Void), "Information not enough to perform complete shape inference")
+  @assert(!isa(arg_shapes, Cvoid), "Information not enough to perform complete shape inference")
 
   arg_arrays = NDArray[zeros(shape, ctx) for shape in arg_shapes]
   arg_names  = list_arguments(self)
@@ -228,7 +228,7 @@ julia> x = mx.Variable(:x)
 MXNet.mx.SymbolicNode x
 
 julia> exec = mx.bind(x + 1, mx.cpu(), Dict(:x => mx.ones(2,3)))
-mx.Executor Ptr{Void} @0x000055c3dee9eb30
+mx.Executor Ptr{Nothing} @0x000055c3dee9eb30
 
 julia> print(exec)
 Symbol Outputs:
diff --git a/julia/src/initializer.jl b/julia/src/initializer.jl
index 95dbeb31febd..d4b35c0ce8bc 100644
--- a/julia/src/initializer.jl
+++ b/julia/src/initializer.jl
@@ -143,6 +143,10 @@ NormalInitializer(; mu=0, sigma=0.01) = NormalInitializer(mu, sigma)
 _init_weight(i::NormalInitializer, name::Symbol, x::NDArray) =
   randn!(x, μ = i.μ, σ = i.σ)
 
+@enum XavierDistribution xv_uniform xv_normal
+@enum XavierRegularization xv_avg xv_in xv_out
+
+
 """
     XavierInitializer
 
@@ -161,10 +165,6 @@ used by various libraries.
 * [K. He, X. Zhang, S. Ren, and J. Sun 2015]: `mx.XavierInitializer(distribution = mx.xv_gaussian, regularization = mx.xv_in, magnitude = 2)`
 * caffe_avg: `mx.XavierInitializer(distribution = mx.xv_uniform, regularization = mx.xv_avg, magnitude = 3)`
 """
-
-@enum XavierDistribution xv_uniform xv_normal
-@enum XavierRegularization xv_avg xv_in xv_out
-
 struct XavierInitializer <: AbstractInitializer
   distribution :: XavierDistribution
   regularization :: XavierRegularization
diff --git a/julia/src/io.jl b/julia/src/io.jl
index e5f43950754c..32f7fece7e41 100644
--- a/julia/src/io.jl
+++ b/julia/src/io.jl
@@ -24,13 +24,12 @@ The root type for all data provider. A data provider should implement the follow
 * [`provide_data`](@ref)
 * [`provide_label`](@ref)
 
-As well as the Julia iterator interface (see [the Julia manual](http://docs.julialang.org/en/stable/manual/interfaces/)).
+As well as the Julia iterator interface (see
+[the Julia manual](https://docs.julialang.org/en/v1/manual/interfaces/#man-interface-iteration-1)).
 Normally this involves defining:
 
 * `Base.eltype(provider) -> AbstractDataBatch`
-* `Base.start(provider) -> AbstractDataProviderState`
-* `Base.done(provider, state) -> Bool`
-* `Base.next(provider, state) -> (AbstractDataBatch, AbstractDataProvider)`
+* `Base.iterate(provider[, state]) -> (AbstractDataBatch, AbstractDataProvider)`
 """
 abstract type AbstractDataProvider end
 
@@ -395,7 +394,11 @@ end
 
 Base.eltype(provider :: ArrayDataProvider) = ArrayDataProviderState
 
-function Base.start(provider :: ArrayDataProvider)
+struct ArrayDataBatch <: AbstractDataBatch
+  idx :: UnitRange{Int}
+end
+
+function _start(provider::ArrayDataProvider)
   if provider.shuffle
     # re-shuffle all data
     idx_perm = randperm(provider.sample_count)
@@ -406,13 +409,9 @@ function Base.start(provider :: ArrayDataProvider)
   return ArrayDataProviderState(1)
 end
 
-Base.done(provider::ArrayDataProvider, state::ArrayDataProviderState) =
-  state.curr_idx > provider.sample_count
-
-struct ArrayDataBatch <: AbstractDataBatch
-  idx :: UnitRange{Int}
-end
-function Base.next(provider :: ArrayDataProvider, state :: ArrayDataProviderState)
+function Base.iterate(provider::ArrayDataProvider,
+                      state::ArrayDataProviderState = _start(provider))
+  (state.curr_idx > provider.sample_count) && return nothing
   idx = state.curr_idx:Base.min(state.curr_idx+provider.batch_size-1, provider.sample_count)
   return (ArrayDataBatch(idx), ArrayDataProviderState(idx.stop+1))
 end
@@ -480,12 +479,12 @@ end
 
 function MXDataProvider(handle     :: MX_DataIterHandle;
                         data_name  :: Symbol = :data,
-                        label_name :: Union{Symbol,Void} = :softmax_label,
+                        label_name :: Union{Symbol,Nothing} = :softmax_label,
                         kwargs...) # for convenience, we ignore the rest keyword arguments
   # init iterator, load the first batch and get shapes
   @assert(_iter_next(handle), "Failed to load the first batch in MXDataProvider")
   data_shape = Tuple{Base.Symbol, Tuple}[(data_name, size(_get_data(handle)))]
-  if !isa(label_name, Void)
+  if !isa(label_name, Nothing)
     label_shape = Tuple{Base.Symbol, Tuple}[(label_name::Base.Symbol, size(_get_label(handle)))]
   else
     label_shape = Tuple{Base.Symbol, Tuple}[]
@@ -504,10 +503,9 @@ end
 struct MXDataBatch <: AbstractDataBatch
 end
 
-function Base.eltype(provider :: MXDataProvider)
-  MXDataBatch
-end
-function Base.start(provider :: MXDataProvider)
+Base.eltype(::MXDataProvider) = MXDataBatch
+
+function _start(provider::MXDataProvider)
   if !provider.first_epoch
     _reset_data_iter(provider.handle)
   else
@@ -516,7 +514,8 @@ function Base.start(provider :: MXDataProvider)
 
   return MXDataProviderState(true)
 end
-function Base.done(provider :: MXDataProvider, state :: MXDataProviderState)
+
+function _done(provider::MXDataProvider, state::MXDataProviderState)
   if provider.first_batch
     state.has_next = true
     provider.first_batch = false
@@ -525,8 +524,10 @@ function Base.done(provider :: MXDataProvider, state :: MXDataProviderState)
   end
   return !state.has_next
 end
-function Base.next(provider :: MXDataProvider, state :: MXDataProviderState)
-  return (MXDataBatch(), state)
+
+function Base.iterate(provider::MXDataProvider, state::MXDataProviderState = _start(provider))
+  _done(provider, state) && return nothing
+  MXDataBatch(), state
 end
 
 function get_data(provider :: MXDataProvider, batch :: MXDataBatch)
@@ -574,7 +575,7 @@ function _populate_iter_creator_cache!()
   end
 end
 
-_get_iter_creator(name :: Symbol) = _iter_creator_cache[name]
+_get_iter_creator(name::Symbol) = _iter_creator_cache[name]
 
 function _define_data_iter_creator(hdr :: MX_handle)
   ref_name      = Ref{char_p}(0)
@@ -611,7 +612,7 @@ function _define_data_iter_creator(hdr :: MX_handle)
   end
 
   defun = quote
-    @doc $f_desc ->
+    @doc $f_desc
     function $iter_name(; kwargs...)
       arg_keys = String[string(k) for (k,v) in kwargs]
       arg_vals = String[dump_mx_param(v) for (k,v) in kwargs]
diff --git a/julia/src/kvstore.jl b/julia/src/kvstore.jl
index ac0367144384..000684d5f20d 100644
--- a/julia/src/kvstore.jl
+++ b/julia/src/kvstore.jl
@@ -45,10 +45,10 @@ For distributed training, `KVStore` also supports a number of types:
 """
 mutable struct KVStore
   handle    :: MX_KVStoreHandle
-  updater_c :: Ptr{Void}
+  updater_c :: Ptr{Cvoid}
   updater   :: Function
 
-  KVStore(hdr::MX_KVStoreHandle) = new(hdr, Ptr{Void}(0))
+  KVStore(hdr::MX_KVStoreHandle) = new(hdr, Ptr{Cvoid}(0))
 end
 
 function KVStore(kv_type::Symbol = :local)
@@ -251,7 +251,7 @@ barrier(kv::KVStore) = @mxcall(:MXKVStoreBarrier, (MX_handle,), kv)
 # extra handle parameter of the API to pass the updater object around. Fix this when someday
 # full closure cfunction is supported in Julia.
 function _kvstore_update_wrapper(key::Cint, nd_recv::MX_handle, nd_local::MX_handle,
-                                 updater::Ptr{Void})
+                                 updater::Ptr{Cvoid})
   updater_func = unsafe_pointer_to_objref(updater)
   updater_func(Int(key), NDArray(MX_NDArrayHandle(nd_recv)),
                NDArray(MX_NDArrayHandle(nd_local)))
@@ -291,9 +291,9 @@ julia> x
 """
 function setupdater!(kv::KVStore, updater)
   kv.updater = updater # keep a reference to the julia object so that updater_c is kept valid
-  kv.updater_c = cfunction(_kvstore_update_wrapper, Void,
-                           (Cint, MX_handle, MX_handle, Ptr{Void}))
-  @mxcall(:MXKVStoreSetUpdater, (MX_handle, Ptr{Void}, Any),
+  kv.updater_c = @cfunction(_kvstore_update_wrapper, Cvoid,
+                            (Cint,MX_handle,MX_handle,Ptr{Cvoid}))
+  @mxcall(:MXKVStoreSetUpdater, (MX_handle, Ptr{Cvoid}, Any),
           kv, kv.updater_c, updater)
 end
 
@@ -336,7 +336,7 @@ julia> W
 ```
 """
 function setoptimizer!(kv::KVStore, opt::AbstractOptimizer)
-  if ismatch(r"dist", string(get_type(kv))) && _isworker()
+  if occursin(r"dist", string(get_type(kv))) && _isworker()
     # TODO
     error("not implemented")
   else
diff --git a/julia/src/metric.jl b/julia/src/metric.jl
index 772eb3b3e680..f1cdc68d947f 100644
--- a/julia/src/metric.jl
+++ b/julia/src/metric.jl
@@ -47,16 +47,16 @@ Update and accumulate metrics.
 * `labels::Vector{NDArray}`: the labels from the data provider.
 * `preds::Vector{NDArray}`: the outputs (predictions) of the network.
 """
-function update!(metric::T, labels::VecOfNDArray, preds::VecOfNDArray) where T <: AbstractEvalMetric
+update!(metric::T, labels::VecOfNDArray, preds::VecOfNDArray) where T<:AbstractEvalMetric =
   _update!(metric, labels, preds, hasNDArraySupport(metric))
-end
 
 function _update!(metric::T, labels::VecOfNDArray, preds::VecOfNDArray,
-                  ::Val{true}) where T<: AbstractEvalMetric
+                  ::Val{true}) where T<:AbstractEvalMetric
   if length(labels) != length(preds)
-    Base.warn_once(
-      "The number of labels ($(length(labels))) does not correspond to the\
-      number of outputs ($(length(preds))). The calculated metric might not be accuracte.")
+    @warn(
+      "The number of labels ($(length(labels))) does not correspond to the " *
+      "number of outputs ($(length(preds))). The calculated metric might not be accuracte.",
+      maxlog = 1)
   end
   for (label, pred) in zip(labels, preds)
     _update_single_output(metric, label, pred)
@@ -64,11 +64,12 @@ function _update!(metric::T, labels::VecOfNDArray, preds::VecOfNDArray,
 end
 
 function _update!(metric::T, labels::VecOfNDArray, preds::VecOfNDArray,
-                  ::Val{false}) where T<: AbstractEvalMetric
+                  ::Val{false}) where {T<:AbstractEvalMetric}
   if length(labels) != length(preds)
-    Base.warn_once(
-      "The number of labels ($(length(labels))) does not correspond to the\
-      number of outputs ($(length(preds))). The calculated metric might not be accuracte.")
+    @warn(
+      "The number of labels ($(length(labels))) does not correspond to the " *
+      "number of outputs ($(length(preds))). The calculated metric might not be accuracte.",
+      maxlog = 1)
   end
   for (label, pred) in zip(labels, preds)
      @nd_as_jl ro=(label, pred) begin
@@ -124,19 +125,19 @@ To calculate both mean-squared error [`Accuracy`](@ref) and log-loss [`ACE`](@re
 ```
 """
 mutable struct MultiMetric <: AbstractEvalMetric
-    metrics :: Vector{mx.AbstractEvalMetric}
+  metrics :: Vector{mx.AbstractEvalMetric}
 end
 
 function update!(metric :: MultiMetric, labels :: Vector{<:NDArray}, preds :: Vector{<:NDArray})
-    for m in metric.metrics
-        update!(m, labels, preds)
-    end
-    nothing
+  for m in metric.metrics
+    update!(m, labels, preds)
+  end
+  nothing
 end
 
 function reset!(metric :: MultiMetric)
-    map(reset!, metric.metrics)
-    nothing
+  map(reset!, metric.metrics)
+  nothing
 end
 
 get(metric::MultiMetric) = mapreduce(get, append!, metric.metrics)
@@ -154,21 +155,21 @@ and log-loss [`ACE`](@ref) for the second output:
 ```
 """
 mutable struct SeqMetric <: AbstractEvalMetric
-    metrics :: Vector{AbstractEvalMetric}
+  metrics :: Vector{AbstractEvalMetric}
 end
 
 function update!(metric::SeqMetric, labels::VecOfNDArray, preds::VecOfNDArray)
-    @assert length(metric.metrics) == length(labels)
-    @assert length(metric.metrics) == length(preds)
-    for (m, l, p) in zip(metric.metrics, labels, preds)
-        update!(m, [l], [p])
-    end
-    nothing
+  @assert length(metric.metrics) == length(labels)
+  @assert length(metric.metrics) == length(preds)
+  for (m, l, p) in zip(metric.metrics, labels, preds)
+    update!(m, [l], [p])
+  end
+  nothing
 end
 
 function reset!(metric::SeqMetric)
-    map(reset!, metric.metrics)
-    nothing
+  map(reset!, metric.metrics)
+  nothing
 end
 
 get(metric::SeqMetric) = mapreduce(get, append!, metric.metrics)
@@ -204,7 +205,7 @@ function _update_single_output(metric::Accuracy, label::Array, pred::Array)
         for i in 1:size(labels, 1)
           label = labels[i, j, 1, sample]
           klasses = view(pred, i, j, :, sample)
-          klass = indmax(klasses) - 1 # Classes start at 0...k-1
+          klass = argmax(klasses) - 1 # Classes start at 0...k-1
 
           metric.acc_sum += klass == label
           metric.n_sample += 1
@@ -213,7 +214,7 @@ function _update_single_output(metric::Accuracy, label::Array, pred::Array)
     end
   elseif ndims(pred) == 2 # 1-dimensional case
     for sample in 1:size(label, 1)
-      klass = indmax(view(pred, :, sample)) - 1
+      klass = argmax(view(pred, :, sample)) - 1
       metric.acc_sum += klass == label[sample]
       metric.n_sample += 1
     end
@@ -237,7 +238,6 @@ Mean Squared Error.
 Calculates the mean squared error regression loss.
 Requires that label and prediction have the same shape.
 """
-
 mutable struct MSE{N} <: AbstractEvalMetric
   mse_sum  :: Vector{NDArray{MX_float,N}}
   n_sample :: Int
@@ -269,7 +269,7 @@ function reset!(metric::MSE{N}) where N
   metric.n_sample = 0
 end
 
-doc"""
+@doc doc"""
     NMSE
 
 Normalized Mean Squared Error
diff --git a/julia/src/model.jl b/julia/src/model.jl
index 109cb35e38a6..cb5f95e3c1eb 100644
--- a/julia/src/model.jl
+++ b/julia/src/model.jl
@@ -38,7 +38,7 @@ mutable struct FeedForward <: AbstractModel
   arg_params  :: Dict{Symbol}
   aux_params  :: Dict{Symbol}
 
-  pred_exec   :: Union{Executor,Void}
+  pred_exec   :: Union{Executor,Cvoid}
 
   # leave the rest fields undefined
   FeedForward(arch::SymbolicNode, ctx::Vector{Context}) = new(arch, ctx)
@@ -53,9 +53,9 @@ piece.
 function _split_inputs(batch_size::Int, n_split::Int)
   @assert(batch_size >= n_split)
   per_split = floor(Int, batch_size / n_split)
-  counts    = Base.zeros(Int, n_split)+per_split
+  counts    = Base.zeros(Int, n_split) .+ per_split
   extra     = batch_size - Base.sum(counts)
-  counts[1:extra] += 1
+  counts[1:extra] .+= 1
 
   cum = [0, cumsum(counts)...]
   idx = [cum[i-1]+1:cum[i] for i = 2:length(cum)]
@@ -118,7 +118,7 @@ function init_model(self::FeedForward, initializer::AbstractInitializer; overwri
         arg_params[name] = self.arg_params[name]
         continue
       else
-        warn("Shape mismatch for $name. Overwriting with new one.")
+        @warn("Shape mismatch for $name. Overwriting with new one.")
         delete!(self.arg_params, name)
       end
     end
@@ -131,7 +131,7 @@ function init_model(self::FeedForward, initializer::AbstractInitializer; overwri
         aux_params[name] = self.aux_params[name]
         continue
       else
-        warn("Shape mismatch for $name. Overwriting with new one.")
+        @warn("Shape mismatch for $name. Overwriting with new one.")
         delete!(self.aux_params, name)
       end
     end
@@ -156,7 +156,7 @@ function init_model(self::FeedForward, initializer::AbstractInitializer; overwri
 end
 
 function _setup_predictor(self::FeedForward, overwrite::Bool=false; verbosity::Integer = 1, data_shapes...)
-  if !isdefined(self, :pred_exec) || isa(self.pred_exec, Void) || overwrite
+  if !isdefined(self, :pred_exec) || isa(self.pred_exec, Cvoid) || overwrite
     if !isdefined(self, :arg_params) || !isdefined(self, :aux_params)
       @assert(false, "Model weights not defined, please init or train the model, or load from file")
     end
@@ -164,7 +164,7 @@ function _setup_predictor(self::FeedForward, overwrite::Bool=false; verbosity::I
     # the predictor use only the first device
     self.pred_exec = simple_bind(self.arch, self.ctx[1]; grad_req=GRAD_NOP, data_shapes...)
     dbg_str = mx.debug_str(self.pred_exec)
-    verbosity >= 1 && info(string("TempSpace: ", split(dbg_str, ['\n'])[end-2]..., " on ", self.ctx[1]))
+    verbosity >= 1 && @info(string("TempSpace: ", split(dbg_str, ['\n'])[end-2]..., " on ", self.ctx[1]))
     copy_params_from(self.pred_exec, self.arg_params, self.aux_params)
   else
     # make sure the new setup is compatible with the existing one
@@ -224,7 +224,7 @@ function predict(callback::Function, self::FeedForward, data::AbstractDataProvid
   predict(self, data; overwrite = overwrite, callback=callback, verbosity = verbosity)
 end
 function predict(self::FeedForward, data::AbstractDataProvider;
-                 overwrite::Bool = true, callback::Union{Function,Void}=nothing, verbosity::Integer = 1)
+                 overwrite::Bool = true, callback::Union{Function,Cvoid}=nothing, verbosity::Integer = 1)
   data_shapes = provide_data(data)
   data_names  = [x[1] for x in data_shapes]
   _setup_predictor(self, overwrite; verbosity = verbosity, data_shapes...)
@@ -235,7 +235,7 @@ function predict(self::FeedForward, data::AbstractDataProvider;
   for batch in eachbatch(data)
     load_data!(data, batch, data_arrays)
     forward(self.pred_exec, is_train=false)
-    if isa(callback, Void)
+    if isa(callback, Cvoid)
       # no callback, accumulate the data and return at the end
       for (o_list, o_nd) in zip(output_list, self.pred_exec.outputs)
         push!(o_list, copy(slice(o_nd, 1:count_samples(data, batch))))
@@ -249,7 +249,7 @@ function predict(self::FeedForward, data::AbstractDataProvider;
     end
   end
 
-  if !isa(callback, Void)
+  if !isa(callback, Cvoid)
     # callback exists, do not accumulate data
     return nothing
   end
@@ -264,7 +264,7 @@ function predict(self::FeedForward, data::AbstractDataProvider;
   end
 
   # concatenate along mini-batches
-  output_arrays = [cat(ndims(x[1]), x...) for x in output_list]
+  output_arrays = [cat(x..., dims = ndims(x[1])) for x in output_list]
   if length(output_arrays) == 1
     # only 1 output, return it directly, instead of a list
     output_arrays = output_arrays[1]
@@ -279,7 +279,7 @@ function _init_model(self::FeedForward, data::AbstractDataProvider,
 end
 
 function _create_kvstore(kv_type::Symbol, num_device::Int, arg_params::Dict{Symbol}, verbosity::Int)
-  if num_device == 1 && !ismatch(r"dist", string(kv_type))
+  if num_device == 1 && !occursin(r"dist", string(kv_type))
     return nothing
   else
     if kv_type == :local
@@ -289,7 +289,7 @@ function _create_kvstore(kv_type::Symbol, num_device::Int, arg_params::Dict{Symb
       else
         kv_type = :local_allreduce_cpu
       end
-      verbosity >= 2 && info("Auto-select kvstore type = $kv_type")
+      verbosity >= 2 && @info("Auto-select kvstore type = $kv_type")
     end
     return KVStore(kv_type)
   end
@@ -298,7 +298,7 @@ end
 @defstruct TrainingOptions (
   initializer :: AbstractInitializer = UniformInitializer(0.01),
   n_epoch     :: Int = 10,
-  eval_data   :: Union{Void,AbstractDataProvider} = nothing,
+  eval_data   :: Union{Cvoid,AbstractDataProvider} = nothing,
   eval_metric :: AbstractEvalMetric = Accuracy(),
   kvstore     :: Union{Symbol,KVStore} = :local,
   force_init  :: Bool = false,
@@ -364,25 +364,25 @@ function fit(self::FeedForward, optimizer::AbstractOptimizer, data::AbstractData
              kwargs...)
   opts = TrainingOptions(; kwargs...)
 
-  opts.verbosity >= 1 && info("Start training on $(self.ctx)")
+  opts.verbosity >= 1 && @info("Start training on $(self.ctx)")
 
   batch_size  = get_batch_size(data)
   num_dev     = length(self.ctx)
   slices      = _split_inputs(batch_size, num_dev)
 
   # initialize parameters
-  opts.verbosity >= 2 && info("Initializing parameters...")
+  opts.verbosity >= 2 && @info("Initializing parameters...")
   arg_names, param_names, aux_names = _init_model(self, data, opts.initializer, opts.force_init)
 
   # setup kvstore
   kvstore = opts.kvstore
   if isa(kvstore, Symbol)
-    opts.verbosity >= 2 && info("Creating KVStore...")
+    opts.verbosity >= 2 && @info("Creating KVStore...")
     kvstore = _create_kvstore(kvstore, length(self.ctx), self.arg_params, opts.verbosity)
   end
 
   update_on_kvstore = true
-  if isa(kvstore, Void) || ismatch(r"local_allreduce", string(get_type(kvstore)))
+  if isa(kvstore, Cvoid) || occursin(r"local_allreduce", string(get_type(kvstore)))
     update_on_kvstore = false
   end
 
@@ -407,13 +407,13 @@ function fit(self::FeedForward, optimizer::AbstractOptimizer, data::AbstractData
     end
   end
 
-  train_execs = Array{Executor}(num_dev)
+  train_execs = Array{Executor}(undef, num_dev)
   for i = 1:num_dev
     data_shapes = Dict(map((x) -> x[1] => tuple(x[2][1:end-1]...,length(slices[i])), provide_data(data)))
     label_shapes = Dict(map((x) -> x[1] => tuple(x[2][1:end-1]...,length(slices[i])), provide_label(data)))
     train_execs[i] = simple_bind(self.arch, self.ctx[i]; grad_req=grad_req, data_shapes..., label_shapes...)
     dbg_str = mx.debug_str(train_execs[i])
-    opts.verbosity >= 2 && info(string("TempSpace: ", split(dbg_str, ['\n'])[end-2]..., " on ", self.ctx[i]))
+    opts.verbosity >= 2 && @info(string("TempSpace: ", split(dbg_str, ['\n'])[end-2]..., " on ", self.ctx[i]))
 
     copy_params_from(train_execs[i], self.arg_params, self.aux_params)
   end
@@ -441,12 +441,12 @@ function fit(self::FeedForward, optimizer::AbstractOptimizer, data::AbstractData
     updater = getupdater(optimizer)
   end
 
-  if !isa(kvstore, Void)
+  if !isa(kvstore, Cvoid)
     if update_on_kvstore
       set_optimizer(kvstore, optimizer)
     end
 
-    opts.verbosity >= 2 && info("Initializing KVStore...")
+    opts.verbosity >= 2 && @info("Initializing KVStore...")
     # init kv with gradients
     for idx = 1:length(param_arrays)
       param_on_devs = param_arrays[idx]
@@ -469,7 +469,7 @@ function fit(self::FeedForward, optimizer::AbstractOptimizer, data::AbstractData
   # invoke callbacks on epoch 0
   _invoke_callbacks(self, opts.callbacks, op_state, AbstractEpochCallback)
 
-  opts.verbosity >= 2 && info("Start training...")
+  opts.verbosity >= 2 && @info("Start training...")
   for i_epoch = 1:opts.n_epoch
     time_start = time()
     reset!(opts.eval_metric)
@@ -506,7 +506,7 @@ function fit(self::FeedForward, optimizer::AbstractOptimizer, data::AbstractData
         end
 
         # gradient synchronization
-        if !isa(kvstore, Void)
+        if !isa(kvstore, Cvoid)
           # push gradient, priority is negative index
           push!(kvstore, idx, grad_arrays[idx], priority=-idx)
           if update_on_kvstore
@@ -543,17 +543,17 @@ function fit(self::FeedForward, optimizer::AbstractOptimizer, data::AbstractData
 
     time_stop = time()
     metric = get(opts.eval_metric)
-    opts.verbosity >= 2 && info(format("== Epoch {1:0>3d}/{2:0>3d} ==========", i_epoch, opts.n_epoch))
+    opts.verbosity >= 2 && @info(format("== Epoch {1:0>3d}/{2:0>3d} ==========", i_epoch, opts.n_epoch))
     if opts.verbosity >= 3
-        info("## Training summary")
+        @info("## Training summary")
         for (name, value) in metric
-            info(format("{1:>18s} = {2:.4f}", string(name), value))
+            @info(format("{1:>18s} = {2:.4f}", string(name), value))
         end
-        info(format("{1:>18s} = {2:.4f} seconds", "time", time_stop-time_start))
+        @info(format("{1:>18s} = {2:.4f} seconds", "time", time_stop-time_start))
     end
 
     # evaluation on validation set
-    if !isa(opts.eval_data, Void)
+    if !isa(opts.eval_data, Cvoid)
       # because we are re-using the memory allocated for the training network,
       # the batch_size of the validation dataset must be the same as the training
       # batch_size
@@ -577,9 +577,9 @@ function fit(self::FeedForward, optimizer::AbstractOptimizer, data::AbstractData
       end
 
       if opts.verbosity >= 3
-          info("## Validation summary")
+          @info("## Validation summary")
           for (name, value) in get(opts.eval_metric)
-            info(format("{1:>18s} = {2:.4f}", string(name), value))
+            @info(format("{1:>18s} = {2:.4f}", string(name), value))
           end
       end
     end
@@ -603,7 +603,7 @@ function fit(self::FeedForward, optimizer::AbstractOptimizer, data::AbstractData
     _invoke_callbacks(self, opts.callbacks, op_state, AbstractEpochCallback; metric=metric)
   end # end of all epochs
 
-  opts.verbosity >= 1 && info("Finish training on $(self.ctx)")
+  opts.verbosity >= 1 && @info("Finish training on $(self.ctx)")
   nothing
 end
 
@@ -613,13 +613,15 @@ save_checkpoint(self::FeedForward, prefix::AbstractString, state::OptimizationSt
 function save_checkpoint(sym::SymbolicNode, arg_params::Dict{Symbol},
                          aux_params::Dict{Symbol}, prefix::AbstractString, epoch::Int)
   save("$prefix-symbol.json", sym)
-  save_dict = Dict{Symbol, NDArray}(map((x) -> Symbol("arg:$(x[1])") => x[2], arg_params))
+  save_dict = Dict{Symbol,NDArray}(
+    Symbol("arg:$(x[1])") => x[2] for x in arg_params
+  )
   if !isempty(aux_params)
     merge!(save_dict, Dict(map((x) -> Symbol("aux:$(x[1])") => x[2], aux_params)))
   end
   save_filename = format("{1}-{2:04d}.params", prefix, epoch)
   save(save_filename, save_dict)
-  info("Saved checkpoint to '$save_filename'")
+  @info("Saved checkpoint to '$save_filename'")
 end
 
 function load_checkpoint(prefix::AbstractString, epoch::Int)
@@ -656,7 +658,7 @@ end
 function load_checkpoint(self::FeedForward, prefix::AbstractString, epoch::Int;
                          overwrite::Bool = true, allow_different_arch::Bool = false)
   if isdefined(self, :arg_params) && isdefined(self, :aux_params) && !overwrite
-    info("model weights already exists, skip loading... (call with overwrite=true if needed)")
+    @info("model weights already exists, skip loading... (call with overwrite=true if needed)")
     return self
   end
 
diff --git a/julia/src/ndarray.jl b/julia/src/ndarray.jl
index 9e47150a1a00..dad9b59e8210 100644
--- a/julia/src/ndarray.jl
+++ b/julia/src/ndarray.jl
@@ -61,22 +61,22 @@ function fromTypeFlag(T::TypeFlag)
 end
 
 # create a NDArray handle of specific shape
-function _ndarray_alloc(shape :: NTuple{N, Int}, ctx :: Context, delay_alloc :: Bool) where N
+function _ndarray_alloc(shape::NTuple{N,Int}, ctx::Context, delay_alloc::Bool) where N
   h_ref  = Ref{MX_handle}(0)
-  shape  = flipdim(MX_uint[shape...],1)
+  shape  = collect(reverse(MX_uint.(shape)))
   @mxcall(:MXNDArrayCreate, (Ptr{MX_uint}, MX_uint, Cint, Cint, Cint, Ref{MX_handle}),
-      shape, length(shape), ctx.device_type, ctx.device_id, delay_alloc, h_ref)
+      shape, N, ctx.device_type, ctx.device_id, delay_alloc, h_ref)
   handle = MX_NDArrayHandle(h_ref[])
   return handle
 end
 
 # create a NDArray handle of specific shape type
-function _ndarray_alloc(:: Type{T}, shape :: NTuple{N, Int}, ctx :: Context, delay_alloc :: Bool) where {T <: DType,N}
+function _ndarray_alloc(::Type{T}, shape::NTuple{N,Int}, ctx::Context, delay_alloc::Bool) where {T<:DType,N}
   h_ref  = Ref{MX_handle}(0)
-  shape  = flipdim(MX_uint[shape...],1)
+  shape  = collect(reverse(MX_uint.(shape)))
   dtype  = toTypeFlag(T)
   @mxcall(:MXNDArrayCreateEx, (Ptr{MX_uint}, MX_uint, Cint, Cint, Cint, Cint, Ref{MX_handle}),
-      shape, length(shape), ctx.device_type, ctx.device_id, delay_alloc, dtype, h_ref)
+      shape, N, ctx.device_type, ctx.device_id, delay_alloc, dtype, h_ref)
   handle = MX_NDArrayHandle(h_ref[])
   return handle
 end
@@ -113,51 +113,53 @@ mutable struct NDArray{T,N}
   NDArray{T,N}(handle, writable = true) where {T,N} = new(handle, writable)
 end
 
-NDArray(x::AbstractArray{T}) where {T<:DType} = copy(collect(x), cpu())
-NDArray(x::Array{T}) where {T<:DType} = copy(x, cpu())
+NDArray(x::AbstractArray{<:DType}) = copy(collect(x), cpu())
+NDArray(x::Array{<:DType})         = copy(x, cpu())
 NDArray(::Type{T}, x::AbstractArray) where {T<:DType} =
   copy(convert(AbstractArray{T}, x), cpu())
 NDArray(handle, writable = true) =
   NDArray{eltype(handle), ndims(handle)}(handle, writable)
 
 # type aliases
-const NDArrayOrReal = Union{NDArray, Real}
+const NDArrayOrReal = Union{NDArray,Real}
 const VecOfNDArray = AbstractVector{<:NDArray}
 
-@unfuse NDArray
-
 function Base.show(io::IO, x::NDArray)
-  print(io, "NDArray ")
-  Base.showarray(io, try_get_shared(x, sync = :read), header = false)
+  print(io, "NDArray(")
+  Base.show(io, try_get_shared(x, sync = :read))
+  print(io, ")")
 end
 
 # for REPL
-function Base.show(io::IO, ::MIME{Symbol("text/plain")}, x::NDArray{T, N}) where {T, N}
+function Base.show(io::IO, ::MIME{Symbol("text/plain")}, x::NDArray{T,N}) where {T,N}
   type_ = split(string(typeof(x)), '.', limit=2)[end]
-  size_ = N == 1 ? "$(length(x))-element" : join(size(x), "×")
-  println(io, "$size_ $type_ @ $(context(x)):")
-  Base.showarray(io, try_get_shared(x, sync = :read), false, header = false)
+  n = length(x)
+  size_ = N == 1 ? "$n-element" : join(size(x), "×")
+  print(io, "$size_ $type_ @ $(context(x))", (n == 0) ? "" : ":\n")
+  Base.print_array(io, try_get_shared(x, sync = :read))
 end
 
-Base.unsafe_convert(::Type{MX_handle}, obj::NDArray) =
-  Base.unsafe_convert(MX_handle, obj.handle)
-Base.convert(T::Type{MX_handle}, obj::NDArray) = Base.unsafe_convert(T, obj)
-Base.cconvert(T::Type{MX_handle}, obj::NDArray) = Base.unsafe_convert(T, obj)
+Base.unsafe_convert(::Type{MX_handle}, x::NDArray) =
+  Base.unsafe_convert(MX_handle, x.handle)
+Base.convert(T::Type{MX_handle}, x::NDArray) = Base.unsafe_convert(T, x)
+Base.cconvert(T::Type{MX_handle}, x::NDArray) = Base.unsafe_convert(T, x)
+
+MX_handle(x::NDArray) = Base.convert(MX_handle, x)
 
 ################################################################################
 # NDArray functions exported to the users
 ################################################################################
 """
-    context(arr::NDArray)
+    context(x::NDArray)
 
 Get the context that this `NDArray` lives on.
 """
-function context(arr::NDArray)
+function context(x::NDArray)
   ref_typeid = Ref{Cint}(0)
   ref_devid  = Ref{Cint}(0)
   @mxcall(:MXNDArrayGetContext, (MX_handle, Ref{Cint}, Ref{Cint}),
-          arr, ref_typeid, ref_devid)
-  return Context(ref_typeid[], ref_devid[])
+          x, ref_typeid, ref_devid)
+  Context(ref_typeid[], ref_devid[])
 end
 
 """
@@ -168,7 +170,7 @@ end
 Allocate memory for an uninitialized `NDArray` with a specified type.
 """
 empty(::Type{T}, dims::NTuple{N,Int}, ctx::Context = cpu()) where {N,T<:DType} =
-  NDArray{T, N}(_ndarray_alloc(T, dims, ctx, false))
+  NDArray{T,N}(_ndarray_alloc(T, dims, ctx, false))
 empty(::Type{T}, dims::Int...) where {T<:DType} = empty(T, dims)
 
 """
@@ -198,9 +200,9 @@ Base.similar(x::NDArray{T}) where {T} = empty(T, size(x), context(x))
 Create zero-ed `NDArray` with specific shape and type.
 """
 function zeros(::Type{T}, dims::NTuple{N,Int}, ctx::Context = cpu()) where {N,T<:DType}
-  arr = empty(T, dims, ctx)
-  arr[:] = zero(T)
-  arr
+  x = empty(T, dims, ctx)
+  x[:] = zero(T)
+  x
 end
 
 zeros(::Type{T}, dims::Int...) where {T<:DType} = zeros(T, dims)
@@ -234,32 +236,24 @@ ones(dims::Int...) = ones(dims)
 ones(x::NDArray)::typeof(x)      = ones_like(x)
 Base.ones(x::NDArray)::typeof(x) = ones_like(x)
 
-import Base: size, length, ndims, eltype
+import Base: length, ndims
 
 """
     size(x::NDArray)
-    size(x::NDArray, dims...)
+    size(x::NDArray, dims)
 
 Get the shape of an `NDArray`. The shape is in Julia's column-major convention.
 See also the notes on NDArray shapes [`NDArray`](@ref).
 """
-function size(x::NDArray)
+function Base.size(x::NDArray)
   ref_ndim  = Ref{MX_uint}(0)
   ref_shape = Ref{Ptr{MX_uint}}(0)
   @mxcall(:MXNDArrayGetShape, (MX_handle, Ref{MX_uint}, Ref{Ptr{MX_uint}}),
           x, ref_ndim, ref_shape)
-  tuple(map(Int, flipdim(unsafe_wrap(Array, ref_shape[], ref_ndim[]),1))...)
+  tuple(map(Int, reverse(unsafe_wrap(Array, ref_shape[], ref_ndim[])))...)
 end
 
-function size(x::NDArray{T,N}, dim::Int) where {T,N}
-  if dim > N
-    1
-  else
-    size(x)[dim]
-  end
-end
-
-size(x::NDArray, dims::Int...) = map(d -> size(x, d), dims)
+Base.size(x::NDArray{T,N}, dims::Integer) where {T,N} = (dims > N) ? 1 : size(x)[dims]
 
 """
     length(x::NDArray)
@@ -289,25 +283,23 @@ end
 
 Get the element type of an `NDArray`.
 """
-function eltype(x::Union{NDArray, MX_NDArrayHandle})
+function Base.eltype(x::Union{NDArray,MX_NDArrayHandle})
   dtype_ref = Ref{Cint}(0)
   @mxcall(:MXNDArrayGetDType, (MX_handle, Ptr{Cint}), x, dtype_ref)
 
   if dtype_ref[] == -1 # x->is_none()
-    warn("Eltype of $x is not defined")
-    Base.show_backtrace(STDOUT, backtrace())
-    println()
-    Float32
-  else
-    fromTypeFlag(TypeFlag(dtype_ref[]))
+    # TODO: unit test for this branch
+    throw(MXError("Eltype of $x is not defined"))
   end
+
+  fromTypeFlag(TypeFlag(dtype_ref[]))
 end
 
 @inline _first(x::NDArray) = try_get_shared(x, sync = :read) |> first
 
 Base.first(x::NDArray) = _first(x)
 
-Base.endof(x::NDArray) = length(x)
+Base.lastindex(x::NDArray) = length(x)
 
 """
     slice(arr :: NDArray, start:stop)
@@ -456,7 +448,7 @@ Copy contents of `src` into `dst`.
 function copy!(dst::NDArray, src::NDArray)
   @assert(dst.writable)
   if dst.handle == src.handle
-    warn("Copying an NDArray to itself")
+    @warn("Copying an NDArray to itself")
     return
   end
 
@@ -466,7 +458,7 @@ end
 
 function copy!(dst::Array{T}, src::NDArray{T}) where T<:DType
   @assert size(dst) == size(src)
-  @mxcall(:MXNDArraySyncCopyToCPU, (MX_handle, Ptr{Void}, Csize_t),
+  @mxcall(:MXNDArraySyncCopyToCPU, (MX_handle, Ptr{Cvoid}, Csize_t),
           src, pointer(dst), length(dst))
   dst
 end
@@ -478,7 +470,7 @@ function copy!(dst::NDArray{T}, src::Array{<:Real}) where {T}
   @assert dst.writable
   @assert size(dst) == size(src)
   src = convert(Array{T}, src) # this might involve copying
-  @mxcall(:MXNDArraySyncCopyFromCPU, (MX_handle, Ptr{Void}, Csize_t),
+  @mxcall(:MXNDArraySyncCopyFromCPU, (MX_handle, Ptr{Cvoid}, Csize_t),
           dst.handle, pointer(src), length(src))
   dst
 end
@@ -487,7 +479,7 @@ function copy_ignore_shape!(dst::NDArray{T}, src::Array{<:Real}) where {T}
   @assert dst.writable
   @assert length(dst) == length(src)
   src = convert(Array{T}, src) # this might involve copying
-  @mxcall(:MXNDArraySyncCopyFromCPU, (MX_handle, Ptr{Void}, Csize_t),
+  @mxcall(:MXNDArraySyncCopyFromCPU, (MX_handle, Ptr{Cvoid}, Csize_t),
           dst.handle, pointer(src), length(src))
   dst
 end
@@ -501,8 +493,10 @@ end
 Create a copy of an array. When no `Context` is given, create a Julia `Array`.
 Otherwise, create an `NDArray` on the specified context.
 """
+copy
+
 # Create copy: NDArray -> Julia Array
-copy(x::NDArray{T,D}) where{T,D} = copy!(Array{T,D}(size(x)), x)
+copy(x::NDArray{T,D}) where{T,D} = copy!(Array{T,D}(undef, size(x)), x)
 
 # Create copy: NDArray -> NDArray in a given context
 copy(x::NDArray{T,D}, ctx::Context) where {T,D} =
@@ -538,27 +532,27 @@ end
 """
     hcat(x::NDArray...)
 """
-Base.hcat(xs::NDArray{T}...) where T = cat(2, xs...)
+Base.hcat(xs::NDArray{T}...) where T = cat(xs..., dims = 2)
 
 """
     vcat(x::NDArray...)
 """
-Base.vcat(xs::NDArray{T}...) where T = cat(1, xs...)
+Base.vcat(xs::NDArray{T}...) where T = cat(xs..., dims = 1)
 
 """
-    cat(dim, xs::NDArray...)
+    cat(xs::NDArray...; dims)
 
-Concate the `NDArray`s which have the same element type along the `dim`.
+Concate the `NDArray`s which have the same element type along the `dims`.
 Building a diagonal matrix is not supported yet.
 """
-function Base.cat(dim::Int, xs::NDArray{T}...) where T
+function Base.cat(xs::NDArray{T}...; dims) where T
   ns = ndims.(xs)
-  d = Base.max(dim, maximum(ns))
+  d = Base.max(dims, maximum(ns))
   xs′ = map(zip(ns, xs)) do i
     n, x = i
     (d > n) ? reshape(x, -2, Base.ones(Int, d - n)...) : x
   end
-  concat(xs′..., dim = d - dim)
+  concat(xs′..., dim = d - dims)
 end
 
 """
@@ -633,11 +627,7 @@ added together. Note at least the first or second argument needs to be an
 +(x::NDArray, y::Real)    = _plus_scalar(x, scalar = y)
 +(y::Real,    x::NDArray) = _plus_scalar(x, scalar = y)
 
-broadcast_(::typeof(+), x::NDArray, y::Real) = x + y
-broadcast_(::typeof(+), x::Real, y::NDArray) = x + y
-
-broadcast_(::typeof(+), x::NDArray{T,N}, y::NDArray{T,N}) where {T,N}   = x + y
-broadcast_(::typeof(+), x::NDArray{T,N}, y::NDArray{T,M}) where {T,N,M} =
+broadcasted(::typeof(+), x::NDArray{T,N}, y::NDArray{T,M}) where {T,N,M} =
   _broadcast_add(x, y)
 
 """
@@ -665,16 +655,12 @@ import Base: -
 Subtraction `x - y`, of scalar types or `NDArray`.
 Or create the negative of `x`.
 """
--(x::NDArray) = _mul_scalar(x, scalar = -one(eltype(x)))
+-(x::NDArray)             = _mul_scalar(x, scalar = -one(eltype(x)))
 -(x::NDArray, y::NDArray) = _minus(x, y)
 -(x::NDArray, y::Real)    = _minus_scalar(x, scalar = y)
 -(y::Real, x::NDArray)    = _rminus_scalar(x, scalar = y)
 
-broadcast_(::typeof(-), x::NDArray, y::Real) = x - y
-broadcast_(::typeof(-), x::Real, y::NDArray) = x - y
-
-broadcast_(::typeof(-), x::NDArray{T,N}, y::NDArray{T,N}) where {T,N}   = x - y
-broadcast_(::typeof(-), x::NDArray{T,N}, y::NDArray{T,M}) where {T,N,M} =
+broadcasted(::typeof(-), x::NDArray{T,N}, y::NDArray{T,M}) where {T,N,M} =
   _broadcast_minus(x, y)
 
 """
@@ -703,12 +689,9 @@ Elementwise multiplication for `NDArray`.
 *(x::NDArray, y::Real)  = _mul_scalar(x, scalar = y)
 *(y::Real, x::NDArray)  = _mul_scalar(x, scalar = y)
 
-broadcast_(::typeof(*), x::NDArray, y::Real) = x * y
-broadcast_(::typeof(*), y::Real, x::NDArray) = x * y
-
-broadcast_(::typeof(*), x::NDArray{T,N}, y::NDArray{T,N}) where {T,N} =
+broadcasted(::typeof(*), x::NDArray{T,N}, y::NDArray{T,N}) where {T,N} =
   _mul(x, y)
-broadcast_(::typeof(*), x::NDArray{T,N}, y::NDArray{T,M}) where {T,N,M} =
+broadcasted(::typeof(*), x::NDArray{T,N}, y::NDArray{T,M}) where {T,N,M} =
   _broadcast_mul(x, y)
 
 """
@@ -718,6 +701,9 @@ Matrix/tensor multiplication.
 """
 *(x::NDArray{T}, y::NDArray{T}) where T = x ⋅ y
 
+LinearAlgebra.adjoint(x::NDArray{T,1}) where T = transpose(x)
+LinearAlgebra.adjoint(x::NDArray{T,2}) where T = transpose(x)
+
 """
     div_from!(dst::NDArray, arg::NDArrayOrReal)
 
@@ -767,15 +753,13 @@ of the same shape.
 """
 /(x::NDArray, y::Real) = _div_scalar(x, scalar = y)
 
-broadcast_(::typeof(/), x::NDArray, y::Real)    = _div_scalar(x, scalar = y)
-broadcast_(::typeof(/), y::Real, x::NDArray)    = _rdiv_scalar(x, scalar = y)
-
-broadcast_(::typeof(/), x::NDArray{T,N}, y::NDArray{T,N}) where {T,N} =
+broadcasted(::typeof(/), y::Real, x::NDArray) = _rdiv_scalar(x, scalar = y)
+broadcasted(::typeof(/), x::NDArray{T,N}, y::NDArray{T,N}) where {T,N} =
   _div(x, y)
-broadcast_(::typeof(/), x::NDArray{T,N}, y::NDArray{T,M}) where {T,N,M} =
+broadcasted(::typeof(/), x::NDArray{T,N}, y::NDArray{T,M}) where {T,N,M} =
   _broadcast_div(x, y)
 
-function broadcast_(::typeof(/), x::NDArray{T}, y::Real) where {T<:Integer}
+function broadcasted(::typeof(/), x::NDArray{T}, y::Real) where {T<:Integer}
   @assert(round(T, y) != zero(T), "Integer divided by zero")
   _div_scalar(x, scalar = y)
 end
@@ -807,52 +791,50 @@ import Base: %
 
 Elementwise modulo for `NDArray`.
 """
-%(x::NDArray, y::Real) = _mod_scalar(x, scalar = y)
-
-broadcast_(::typeof(%), x::NDArray, y::Real)    = _mod_scalar(x, y)
-broadcast_(::typeof(%), y::Real, x::NDArray)    = _rmod_scalar(x, y)
+%(x::NDArray, y::Real) = _mod_scalar(x, y)
 
-broadcast_(::typeof(%), x::NDArray{T,N}, y::NDArray{T,N}) where {T,N} =
+broadcasted(::typeof(%), y::Real, x::NDArray) = _rmod_scalar(x, y)
+broadcasted(::typeof(%), x::NDArray{T,N}, y::NDArray{T,N}) where {T,N} =
   _mod(x, y)
-broadcast_(::typeof(%), x::NDArray{T,N}, y::NDArray{T,M}) where {T,N,M} =
+broadcasted(::typeof(%), x::NDArray{T,N}, y::NDArray{T,M}) where {T,N,M} =
   _broadcast_mod(x, y)
 
-import Base: ^
-
 # document of `.^` is merged into SymbolicNode's
 
-broadcast_(::typeof(^), x::NDArray, s::Real)    = _power_scalar(x, scalar = s)
-broadcast_(::typeof(^), s::Real, x::NDArray)    = _rpower_scalar(x, scalar = s)
+broadcasted(::typeof(Base.literal_pow), ::typeof(^), x::NDArray, ::Val{s}) where {s} =
+  _power_scalar(x, scalar = s)
+broadcasted(::typeof(^), x::NDArray, s::Real) = _power_scalar(x,  scalar = s)
+broadcasted(::typeof(^), s::Real, x::NDArray) = _rpower_scalar(x, scalar = s)
 
-broadcast_(::typeof(^), ::Irrational{:e}, x::NDArray) = exp(x)
-broadcast_(::typeof(^), x::NDArray, s::Irrational)    = _power_scalar(x, scalar = s)
-broadcast_(::typeof(^), s::Irrational, x::NDArray)    = _rpower_scalar(x, scalar = s)
+broadcasted(::typeof(^), ::Irrational{:ℯ}, x::NDArray) = exp(x)
+broadcasted(::typeof(^), x::NDArray, s::Irrational)    = _power_scalar(x, scalar = s)
+broadcasted(::typeof(^), s::Irrational, x::NDArray)    = _rpower_scalar(x, scalar = s)
 
-broadcast_(::typeof(^), x::NDArray{T,N}, y::NDArray{T,N}) where {T,N} =
+broadcasted(::typeof(^), x::NDArray{T,N}, y::NDArray{T,N}) where {T,N} =
   _power(x, y)
-broadcast_(::typeof(^), x::NDArray{T,N}, y::NDArray{T,M}) where {T,N,M} =
+broadcasted(::typeof(^), x::NDArray{T,N}, y::NDArray{T,M}) where {T,N,M} =
   _broadcast_power(x, y)
 
 ###############################################################################
 # comparison
 ###############################################################################
 
-broadcast_(::typeof(==), x::NDArray{T}, y::NDArray{T}) where {T} =
+broadcasted(::typeof(==), x::NDArray{T}, y::NDArray{T}) where {T} =
   _broadcast_equal(x, y)
 
-broadcast_(::typeof(!=), x::NDArray{T}, y::NDArray{T}) where {T} =
+broadcasted(::typeof(!=), x::NDArray{T}, y::NDArray{T}) where {T} =
   _broadcast_not_equal(x, y)
 
-broadcast_(::typeof(>), x::NDArray{T}, y::NDArray{T}) where {T} =
+broadcasted(::typeof(>), x::NDArray{T}, y::NDArray{T}) where {T} =
   _broadcast_greater(x, y)
 
-broadcast_(::typeof(>=), x::NDArray{T}, y::NDArray{T}) where {T} =
+broadcasted(::typeof(>=), x::NDArray{T}, y::NDArray{T}) where {T} =
   _broadcast_greater_equal(x, y)
 
-broadcast_(::typeof(<), x::NDArray{T}, y::NDArray{T}) where {T} =
+broadcasted(::typeof(<), x::NDArray{T}, y::NDArray{T}) where {T} =
   _broadcast_lesser(x, y)
 
-broadcast_(::typeof(<=), x::NDArray{T}, y::NDArray{T}) where {T} =
+broadcasted(::typeof(<=), x::NDArray{T}, y::NDArray{T}) where {T} =
   _broadcast_lesser_equal(x, y)
 
 
@@ -862,10 +844,10 @@ broadcast_(::typeof(<=), x::NDArray{T}, y::NDArray{T}) where {T} =
 
 import Base: min, max
 
-broadcast_(::typeof(max), x::NDArray{T}, y::NDArray{T}) where {T} =
+broadcasted(::typeof(max), x::NDArray{T}, y::NDArray{T}) where {T} =
   _broadcast_maximum(x, y)
 
-broadcast_(::typeof(min), x::NDArray{T}, y::NDArray{T}) where {T} =
+broadcasted(::typeof(min), x::NDArray{T}, y::NDArray{T}) where {T} =
   _broadcast_minimum(x, y)
 
 """
@@ -894,7 +876,7 @@ fill(x, dims::Integer...) = fill(x, dims)
 
 import Base: hypot
 
-broadcast_(::typeof(hypot), x::NDArray{T}, y::NDArray{T}) where {T} =
+broadcasted(::typeof(hypot), x::NDArray{T}, y::NDArray{T}) where {T} =
   _broadcast_hypot(x, y)
 
 """
@@ -985,7 +967,7 @@ macro nd_as_jl(m_args...)
   wait_statements  = Expr(:block, [:(_wait_to_read($v)) for v in nd_ro]...,
                                   [:(_wait_to_write($v)) for v in nd_rw]...)
   clear_statements = Expr(:block, [:($v_orig = nothing) for v_orig in rw_origs]...)
-  let_assignments  = [:($v = try_get_shared($v)) for v in nd_all]
+  let_assignments  = Expr(:block, [:($v = try_get_shared($v)) for v in nd_all]...)
   sync_statements  = map(rw_origs, nd_rw) do v_orig, v
     quote
       if !is_shared($v, $v_orig)
@@ -996,10 +978,10 @@ macro nd_as_jl(m_args...)
   end
   sync_statements  = Expr(:block, sync_statements...)
 
-  let_statement = Expr(:let, quote
+  let_statement = Expr(:let, let_assignments, quote
     $stmts
     $sync_statements
-  end, let_assignments...)
+  end)
   m_body = quote
     $wait_statements
     $save_statements
@@ -1015,8 +997,8 @@ end
 # pointers from CPU) leads to undefined behavior.
 import Base.pointer
 function pointer(arr :: NDArray)
-  pdata = Ref{Ptr{Void}}(0)
-  @mxcall(:MXNDArrayGetData, (MX_handle, Ref{Ptr{Void}}), arr, pdata)
+  pdata = Ref{Ptr{Cvoid}}(0)
+  @mxcall(:MXNDArrayGetData, (MX_handle, Ref{Ptr{Cvoid}}), arr, pdata)
   return convert(Ptr{eltype(arr)}, pdata[])
 end
 
@@ -1146,18 +1128,6 @@ end
 const _ndsig = Dict{Symbol,Expr}()
 const _nddoc = Dict{Symbol,Any}()
 
-function _autoimport(name::Symbol, sig::Expr)
-  if name == :broadcast_
-    name = _broadcast_target(sig)
-  end
-
-  if isdefined(Base, name)
-    :(import Base: $name)
-  else
-    :()
-  end
-end
-
 _isinplace(name::Symbol) = endswith(string(name), "!")
 
 _writable(name::Symbol, x) =
@@ -1178,7 +1148,7 @@ _broadcast_target(sig::Expr) = sig.args[2].args[].args[end]
 Generate docstring from function signature
 """
 function _docsig(fname::Symbol, sig::Expr, opname::String)
-  if fname !== :broadcast_
+  if fname !== :broadcasted
     get(_nddoc, fname, "    $sig") * "\n" * _getdocdefine(opname)
   else
     name = _broadcast_target(sig)
@@ -1205,11 +1175,12 @@ function _docsig(fname::Symbol, sig::Expr, opname::String)
   end
 end
 
+
 macro _remap(sig::Expr, imp::Expr)
-  fname = (sig.head == :call) ? sig.args[1] : sig.args[1].args[1]  # case of `where`
-  opname = string(imp.args[1])
+  d = splitdef(:($sig = $imp))
+  @capture d[:name] (M_.fname_|fname_)
 
-  import_expr = _autoimport(fname, sig)
+  opname = string(imp.args[1])
 
   if isa(imp.args[2], Expr) && imp.args[2].head == :parameters
     ndin = imp.args[3:end]
@@ -1257,8 +1228,7 @@ macro _remap(sig::Expr, imp::Expr)
   func_def = Expr(:function, sig, func_body)
 
   esc(quote
-    $import_expr
-    @doc $docstr ->
+    @doc $docstr
     $func_def
   end)
 end
@@ -1271,32 +1241,37 @@ macro _remap(sig::Expr, imp::Symbol)
   end)
 end
 
-_ndsig[:reshape] = :(reshape(arr; shape = dim, reverse = !reverse))
-@_remap reshape(arr::NDArray, dim...; reverse = false) reshape
-@_remap reshape(arr::NDArray, dim; reverse = false)    reshape
+_ndsig[:reshape] = :(reshape(x; shape = dim, reverse = !reverse))
+@_remap Base.reshape(x::NDArray, dim...; reverse = false) reshape
+@_remap Base.reshape(x::NDArray, dim   ; reverse = false) reshape
 
-@_remap mean(arr::NDArray)         mean(arr)
-@_remap mean(arr::NDArray, region) mean(arr; axis = 0 .- region, keepdims = true)
+Statistics.mean(x::NDArray; dims = :) = _mean(x, dims)
+@_remap _mean(x::NDArray, ::Colon) mean(x)
+@_remap _mean(x::NDArray, dims)    mean(x; axis = 0 .- dims, keepdims = true)
 
-@_remap sum(arr::NDArray)       sum(arr)
-@_remap sum(arr::NDArray, dims) sum(arr; axis = 0 .- dims, keepdims = true)
+Base.sum(x::NDArray; dims = :) = _sum(x, dims)
+@_remap _sum(x::NDArray, ::Colon) sum(x)
+@_remap _sum(x::NDArray, dims)    sum(x; axis = 0 .- dims, keepdims = true)
 
-@_remap maximum(arr::NDArray)       max(arr)
-@_remap maximum(arr::NDArray, dims) max(arr; axis = 0 .- dims, keepdims = true)
+Base.maximum(x::NDArray; dims = :) = _nd_maximum(x, dims)
+@_remap _nd_maximum(x::NDArray, ::Colon) max(x)
+@_remap _nd_maximum(x::NDArray, dims)    max(x; axis = 0 .- dims, keepdims = true)
 
-@_remap minimum(arr::NDArray)       min(arr)
-@_remap minimum(arr::NDArray, dims) min(arr; axis = 0 .- dims, keepdims = true)
+Base.minimum(x::NDArray; dims = :) = _nd_minimum(x, dims)
+@_remap _nd_minimum(x::NDArray, ::Colon) min(x)
+@_remap _nd_minimum(x::NDArray, dims)    min(x; axis = 0 .- dims, keepdims = true)
 
 # See https://github.com/dmlc/MXNet.jl/issues/55
-@_remap dot(x::NDArray, y::NDArray) dot(y, x)
+@_remap LinearAlgebra.dot(x::NDArray, y::NDArray) dot(y, x)
 
 # See https://github.com/dmlc/MXNet.jl/pull/123
-@_remap transpose(arr::NDArray{T,1}) where T reshape(arr; shape = (1, length(arr)), reverse = true)
-@_remap transpose(arr::NDArray{T,2}) where T transpose(arr)
-@_remap permutedims(arr::NDArray, axes) transpose(arr; axes = length(axes) .- tuple(axes...))
+@_remap Base.transpose(x::NDArray{T,1}) where T reshape(x; shape = (1, length(x)), reverse = true)
+@_remap Base.transpose(x::NDArray{T,2}) where T transpose(x)
+@_remap Base.permutedims(x::NDArray, axes) transpose(x; axes = length(axes) .- tuple(axes...))
 
-@_remap prod(arr::NDArray)       prod(arr)
-@_remap prod(arr::NDArray, dims) prod(arr; axis = 0 .- dims, keepdims = true)
+Base.prod(x::NDArray; dims = :) = _prod(x, dims)
+@_remap _prod(x::NDArray, ::Colon) prod(x)
+@_remap _prod(x::NDArray, dims)    prod(x; axis = 0 .- dims, keepdims = true)
 
 _nddoc[:clip] = _nddoc[:clip!] =
 """
@@ -1362,23 +1337,23 @@ julia> mx.expand_dims(x, 2)
 @_remap expand_dims(x::NDArray, dim) expand_dims(x; axis = -dim)
 
 # trigonometric functions, remap to keep consistent with Base
-@_remap broadcast_(::typeof(sin),  x::NDArray) sin(x)
-@_remap broadcast_(::typeof(cos),  x::NDArray) cos(x)
-@_remap broadcast_(::typeof(tan),  x::NDArray) tan(x)
-@_remap broadcast_(::typeof(asin), x::NDArray) arcsin(x)
-@_remap broadcast_(::typeof(acos), x::NDArray) arccos(x)
-@_remap broadcast_(::typeof(atan), x::NDArray) arctan(x)
+@_remap broadcasted(::typeof(sin),  x::NDArray) sin(x)
+@_remap broadcasted(::typeof(cos),  x::NDArray) cos(x)
+@_remap broadcasted(::typeof(tan),  x::NDArray) tan(x)
+@_remap broadcasted(::typeof(asin), x::NDArray) arcsin(x)
+@_remap broadcasted(::typeof(acos), x::NDArray) arccos(x)
+@_remap broadcasted(::typeof(atan), x::NDArray) arctan(x)
 
 # hyperbolic funcs, remap to keep consistent with Base
-@_remap broadcast_(::typeof(sinh),  x::NDArray) sinh(x)
-@_remap broadcast_(::typeof(cosh),  x::NDArray) cosh(x)
-@_remap broadcast_(::typeof(tanh),  x::NDArray) tanh(x)
-@_remap broadcast_(::typeof(asinh), x::NDArray) arcsinh(x)
-@_remap broadcast_(::typeof(acosh), x::NDArray) arccosh(x)
-@_remap broadcast_(::typeof(atanh), x::NDArray) arctanh(x)
+@_remap broadcasted(::typeof(sinh),  x::NDArray) sinh(x)
+@_remap broadcasted(::typeof(cosh),  x::NDArray) cosh(x)
+@_remap broadcasted(::typeof(tanh),  x::NDArray) tanh(x)
+@_remap broadcasted(::typeof(asinh), x::NDArray) arcsinh(x)
+@_remap broadcasted(::typeof(acosh), x::NDArray) arccosh(x)
+@_remap broadcasted(::typeof(atanh), x::NDArray) arctanh(x)
 
 # activation functions
-_nddoc[:σ] = _nddoc[:sigmoid] = doc"""
+@doc doc"""
     σ.(x::NDArray)
     sigmoid.(x::NDArray)
 
@@ -1390,10 +1365,12 @@ Computes sigmoid of x element-wise.
 
 The storage type of `sigmoid` output is always dense.
 """
-@_remap broadcast_(::typeof(σ), x::NDArray)       sigmoid(x)
-@_remap broadcast_(::typeof(sigmoid), x::NDArray) sigmoid(x)
+function σ end
+const sigmoid = σ
+_nddoc[:σ] = false
+@_remap broadcasted(::typeof(σ), x::NDArray) sigmoid(x)
 
-_nddoc[:relu] = doc"""
+@doc doc"""
     relu.(x::NDArray)
 
 Computes rectified linear.
@@ -1402,9 +1379,11 @@ Computes rectified linear.
 \max(x, 0)
 ```
 """
-@_remap broadcast_(::typeof(relu), x::NDArray) relu(x)
+function relu end
+_nddoc[:relu] = false
+@_remap broadcasted(::typeof(relu), x::NDArray) relu(x)
 
-_nddoc[:softmax] = doc"""
+@doc doc"""
     softmax.(x::NDArray, [dim = ndims(x)])
 
 Applies the softmax function.
@@ -1416,10 +1395,12 @@ and the elements along the given axis sum up to 1.
 softmax(\mathbf{z})_j = \frac{e^{z_j}}{\sum_{k=1}^K e^{z_k}}
 ```
 """
-@_remap broadcast_(::typeof(softmax), x::NDArray) softmax(x; axis = -ndims(x))
-@_remap broadcast_(::typeof(softmax), x::NDArray, dim::Int) softmax(x; axis = -dim)
+function softmax end
+_nddoc[:softmax] = false
+@_remap broadcasted(::typeof(softmax), x::NDArray)           softmax(x; axis = -ndims(x))
+@_remap broadcasted(::typeof(softmax), x::NDArray, dim::Int) softmax(x; axis = -dim)
 
-_nddoc[:log_softmax] = """
+"""
     log_softmax.(x::NDArray, [dim = ndims(x)])
 
 Computes the log softmax of the input.
@@ -1435,8 +1416,10 @@ julia> mx.log_softmax.(x)
  -1.41703  -0.41703  -2.31703
  -2.31703  -0.41703  -1.41703
 """
-@_remap broadcast_(::typeof(log_softmax), x::NDArray) log_softmax(x; axis = -ndims(x))
-@_remap broadcast_(::typeof(log_softmax), x::NDArray, dim::Int) log_softmax(x; axis = -dim)
+function log_softmax end
+_nddoc[:log_softmax] = false
+@_remap broadcasted(::typeof(log_softmax), x::NDArray)           log_softmax(x; axis = -ndims(x))
+@_remap broadcasted(::typeof(log_softmax), x::NDArray, dim::Int) log_softmax(x; axis = -dim)
 
 ################################################################################
 # remapping to solving type unstablility
@@ -1570,7 +1553,7 @@ julia> mx.broadcast_axis(x, 3, 2)
 """
 @_remap(broadcast_axis(x::NDArray, dim, size),
         broadcast_axis(x; axis = ndims(x) .- dim, size = size))
-@_remap(broadcast_axes(x::NDArray, dim, size),
+@_remap(Base.broadcast_axes(x::NDArray, dim, size),
         broadcast_axes(x; axis = ndims(x) .- dim, size = size))
 
 ################################################################################
@@ -1619,7 +1602,7 @@ Upon calling, the output arguments will be automatically initialized with empty
 Those functions always return the output arguments. If there is only one output (the typical situation), that
 object (`NDArray`) is returned. Otherwise, a tuple containing all the outputs will be returned.
 """
-function _get_ndarray_function_def(name :: String)
+function _get_ndarray_function_def(name::String)
   func_name = Symbol(name)
 
   func_def = quote
@@ -1763,11 +1746,13 @@ macro _import_ndarray_functions()
     func_def, func_def2 = _get_ndarray_function_def(name)
 
     func_name = Symbol(name)
-    expr = quote
-      # TODO the explicit exclusion of take will no longer be necessary when it is removed from Base
-      $((isdefined(Base, func_name) && func_name ≠ :take) ? :(import Base.$func_name) : :())
+
+    import_expr = _import_expr(func_name)
+
+    quote
+      $import_expr
       $func_def
-      @doc $desc ->
+      @doc $desc
       $func_def2
     end
   end
@@ -1777,4 +1762,4 @@ macro _import_ndarray_functions()
   end)
 end
 
-@_import_ndarray_functions()
+@_import_ndarray_functions
diff --git a/julia/src/nn-factory.jl b/julia/src/nn-factory.jl
index b5134f9d2bd5..cb5df3722470 100644
--- a/julia/src/nn-factory.jl
+++ b/julia/src/nn-factory.jl
@@ -38,7 +38,7 @@ fully connected layers.
 
 Returns the constructed MLP.
 """
-function MLP(input, spec; hidden_activation::Symbol=:relu, prefix=gensym())
+function MLP(input, spec; hidden_activation::Symbol = :relu, prefix = gensym())
   spec = convert(Vector{Union{Int,Tuple}}, spec)
 
   n_layer = length(spec)
diff --git a/julia/src/optimizer.jl b/julia/src/optimizer.jl
index 6436878df678..46726500f81f 100644
--- a/julia/src/optimizer.jl
+++ b/julia/src/optimizer.jl
@@ -73,6 +73,8 @@ OptimizationState(batch_size::Int) = OptimizationState(batch_size, 0, 0, 0)
 
 module LearningRate
 
+using Markdown
+
 import Base: get
 import ..mx: AbstractLearningRateScheduler, OptimizationState, update!
 
@@ -100,7 +102,7 @@ end
 
 get(f::Fixed) = f.η
 
-doc"""
+@doc doc"""
     LearningRate.Exp(η₀; γ = 0.9)
 
 ```math
@@ -122,7 +124,7 @@ end
 
 get(a::Exp) = a.η₀ * a.γ^a.t
 
-doc"""
+@doc doc"""
     LearningRate.Inv(η₀; γ = 0.9, p = 0.5)
 
 ```math
@@ -156,6 +158,8 @@ using .LearningRate
 
 module Momentum
 
+using Markdown
+
 import Base: get
 import ..mx: AbstractMomentumScheduler, OptimizationState
 
@@ -194,7 +198,7 @@ end
 
 get(f::Fixed) = f.μ
 
-doc"""
+@doc doc"""
     NadamScheduler(; μ = 0.99, δ = 0.004, γ = 0.5, α = 0.96)
 
 Nesterov-accelerated adaptive momentum scheduler.
diff --git a/julia/src/optimizers/adadelta.jl b/julia/src/optimizers/adadelta.jl
index 2b0cd0060261..7a45dd0459db 100644
--- a/julia/src/optimizers/adadelta.jl
+++ b/julia/src/optimizers/adadelta.jl
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-doc"""
+@doc doc"""
     AdaDelta(; kwargs...)
 
 Scale learning rates by the ratio of accumulated gradients to accumulated
diff --git a/julia/src/optimizers/adagrad.jl b/julia/src/optimizers/adagrad.jl
index 4236cb8cda20..6ddcb36280bc 100644
--- a/julia/src/optimizers/adagrad.jl
+++ b/julia/src/optimizers/adagrad.jl
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-doc"""
+@doc doc"""
     AdaGrad(; kwargs...)
 
 Scale learning rates by dividing with the square root of accumulated
diff --git a/julia/src/optimizers/nadam.jl b/julia/src/optimizers/nadam.jl
index fdcd1ea7e7ab..522e9194caa8 100644
--- a/julia/src/optimizers/nadam.jl
+++ b/julia/src/optimizers/nadam.jl
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-doc"""
+@doc doc"""
     Nadam(; kwargs...)
 
 Nesterov Adam optimizer: Adam RMSprop with Nesterov momentum,
diff --git a/julia/src/optimizers/rmsprop.jl b/julia/src/optimizers/rmsprop.jl
index 8351142681b1..18445752588a 100644
--- a/julia/src/optimizers/rmsprop.jl
+++ b/julia/src/optimizers/rmsprop.jl
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-doc"""
+@doc doc"""
     RMSProp(; kwargs...)
 
 Scale learning rates by dividing with the moving average of the root mean
diff --git a/julia/src/optimizers/sgd.jl b/julia/src/optimizers/sgd.jl
index dfd3d954baa3..6af8094829f6 100644
--- a/julia/src/optimizers/sgd.jl
+++ b/julia/src/optimizers/sgd.jl
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-doc"""
+@doc doc"""
     SGD(; kwargs...)
 
 Stochastic gradient descent optimizer.
@@ -69,7 +69,7 @@ SGD
 create_state(sgd::SGD, ::Int, W::NDArray) =
   isa(sgd.μ_sched, Momentum.Null) ? nothing : zeros(size(W), context(W))
 
-function update!(sgd::SGD, ::Int, W::NDArray, ∇::NDArray, ::Void)
+function update!(sgd::SGD, ::Int, W::NDArray, ∇::NDArray, ::Nothing)
   η = get(sgd.η_sched)
   normgrad!(sgd, W, ∇)
   @inplace W += -η * ∇
diff --git a/julia/src/random.jl b/julia/src/random.jl
index b9b32a42ecff..e18e906a5307 100644
--- a/julia/src/random.jl
+++ b/julia/src/random.jl
@@ -76,8 +76,13 @@ randn(dims::Int...; μ = 0, σ = 1, context = cpu()) =
   randn!(empty(dims, context), μ = μ, σ = σ)
 
 """
-    srand(seed::Int)
+    seed!(seed::Int)
 
 Set the random seed of libmxnet
 """
-srand(seed_state::Int) = @mxcall(:MXRandomSeed, (Cint,), seed_state)
+seed!(s::Int) = @mxcall :MXRandomSeed (Cint,) s
+
+function srand(s::Int)
+  @warn "`mx.srand` is deprecated, use `mx.seed!` instead."
+  seed!(s)
+end
diff --git a/julia/src/symbolic-node.jl b/julia/src/symbolic-node.jl
index 508f9d449028..8b7a8be0999d 100644
--- a/julia/src/symbolic-node.jl
+++ b/julia/src/symbolic-node.jl
@@ -19,20 +19,21 @@
     SymbolicNode
 
 SymbolicNode is the basic building block of the symbolic graph in MXNet.jl.
+It's a callable object and supports following calls:
 
-    (self :: SymbolicNode)(args :: SymbolicNode...)
-    (self :: SymbolicNode)(; kwargs...)
+    (s::SymbolicNode)(args::SymbolicNode...)
+    (s::SymbolicNode)(; kwargs...)
 
-Make a new node by composing `self` with `args`. Or the arguments
+Make a new node by composing `s` with `args`. Or the arguments
 can be specified using keyword arguments.
 """
 mutable struct SymbolicNode
   handle::MX_SymbolHandle
 end
 
-const SymbolicNodeOrReal = Union{SymbolicNode, Real}
+const SymbolicNodeOrReal = Union{SymbolicNode,Real}
 
-@unfuse SymbolicNode  # for broadcasting
+# @unfuse SymbolicNode  # for broadcasting
 
 Base.unsafe_convert(::Type{MX_handle}, obj::SymbolicNode) =
   Base.unsafe_convert(MX_handle, obj.handle)
@@ -40,31 +41,31 @@ Base.convert(t::Type{MX_handle}, obj::SymbolicNode) = Base.unsafe_convert(t, obj
 Base.cconvert(t::Type{MX_handle}, obj::SymbolicNode) = Base.unsafe_convert(t, obj)
 
 """
-    deepcopy(self :: SymbolicNode)
+    deepcopy(s::SymbolicNode)
 
 Make a deep copy of a SymbolicNode.
 """
-function Base.deepcopy(self :: SymbolicNode)
-  ref_hdr = Ref{MX_handle}(0)
-  @mxcall(:MXSymbolCopy, (MX_handle, Ref{MX_handle}), self, ref_hdr)
-  return SymbolicNode(MX_SymbolHandle(ref_hdr[]))
+function Base.deepcopy(s::SymbolicNode)
+  ref_hdr = Ref{MX_handle}(C_NULL)
+  @mxcall(:MXSymbolCopy, (MX_handle, Ref{MX_handle}), s, ref_hdr)
+  SymbolicNode(MX_SymbolHandle(ref_hdr[]))
 end
 
 """
-    copy(self :: SymbolicNode)
+    copy(s::SymbolicNode)
 
 Make a copy of a SymbolicNode. The same as making a deep copy.
 """
-function Base.copy(self :: SymbolicNode)
-  Base.deepcopy(self)
-end
+Base.copy(s::SymbolicNode) = Base.deepcopy(s)
 
-function (self::SymbolicNode)(args :: SymbolicNode...)
-  s = deepcopy(self)
+
+function (s::SymbolicNode)(args::SymbolicNode...)
+  s = deepcopy(s)
   _compose!(s, args...)
 end
-function (self::SymbolicNode)(;kwargs...)
-  s = deepcopy(self)
+
+function (s::SymbolicNode)(; kwargs...)
+  s = deepcopy(s)
   _compose!(s; kwargs...)
 end
 
@@ -82,7 +83,7 @@ macro _list_symbol_info(self, func_name)
 end
 
 """
-    list_arguments(self :: SymbolicNode)
+    list_arguments(s::SymbolicNode)
 
 List all the arguments of this node. The argument for a node contains both
 the inputs and parameters. For example, a `FullyConnected` node will
@@ -91,24 +92,20 @@ list all the arguments for intermediate nodes.
 
 Returns a list of symbols indicating the names of the arguments.
 """
-function list_arguments(self :: SymbolicNode)
-  @_list_symbol_info(self, :MXSymbolListArguments)
-end
+list_arguments(s::SymbolicNode) = @_list_symbol_info(s, :MXSymbolListArguments)
 
 """
-    list_outputs(self :: SymbolicNode)
+    list_outputs(s::SymbolicNode)
 
 List all the outputs of this node.
 
 Returns a list of symbols indicating the names of the outputs.
 """
-function list_outputs(self :: SymbolicNode)
-  @_list_symbol_info(self, :MXSymbolListOutputs)
-end
+list_outputs(s::SymbolicNode) = @_list_symbol_info(s, :MXSymbolListOutputs)
 
 
 """
-    list_auxiliary_states(self :: SymbolicNode)
+    list_auxiliary_states(s::SymbolicNode)
 
 
 List all auxiliary states in the symbool.
@@ -120,19 +117,18 @@ Most operators do not have Auxiliary states.
 
 Returns a list of symbols indicating the names of the auxiliary states.
 """
-function list_auxiliary_states(self :: SymbolicNode)
-  @_list_symbol_info(self, :MXSymbolListAuxiliaryStates)
-end
+list_auxiliary_states(s::SymbolicNode) =
+  @_list_symbol_info(s, :MXSymbolListAuxiliaryStates)
 
 """
-    get_internals(self :: SymbolicNode)
+    get_internals(s::SymbolicNode)
 
 Get a new grouped `SymbolicNode` whose output contains all the internal outputs of
 this `SymbolicNode`.
 """
-function get_internals(self :: SymbolicNode)
+function get_internals(s::SymbolicNode)
   ref_hdr = Ref{MX_handle}(0)
-  @mxcall(:MXSymbolGetInternals, (MX_handle, Ref{MX_handle}), self, ref_hdr)
+  @mxcall(:MXSymbolGetInternals, (MX_handle, Ref{MX_handle}), s, ref_hdr)
   return SymbolicNode(MX_SymbolHandle(ref_hdr[]))
 end
 
@@ -166,37 +162,38 @@ function get_children(x::SymbolicNode)
 end
 
 """
-    get_attr(self :: SymbolicNode, key :: Symbol)
+    get_attr(s::SymbolicNode, key::Symbol)
 
 Get attribute attached to this `SymbolicNode` belonging to key.
 
-Returns the value belonging to key as a `Nullable`.
+Returns the value belonging to key as a `String`.
+If not available, returns `missing`.
 """
-function get_attr(self :: SymbolicNode, key :: Symbol)
+function get_attr(s::SymbolicNode, key::Symbol)
   key_s = string(key)
   ref_out = Ref{Cstring}()
   ref_success = Ref{Cint}(-1)
   @mxcall(:MXSymbolGetAttr, (MX_handle, Cstring, Ref{Cstring}, Ref{Cint}),
-          self, key_s, ref_out, ref_success)
+          s, key_s, ref_out, ref_success)
   if ref_success[] == 1
-    return Nullable{String}(unsafe_string(ref_out[]))
+    unsafe_string(ref_out[])
   else
-    return Nullable{String}()
+    missing
   end
 end
 
 """
-    list_attr(self :: SymbolicNode)
+    list_attr(s::SymbolicNode)
 
 Get all attributes from a symbol.
 
 Returns a dictionary of attributes.
 """
-function list_attr(self :: SymbolicNode)
+function list_attr(s::SymbolicNode)
   ref_sz    = Ref{MX_uint}(0)
   ref_strings = Ref{char_pp}(0)
   @mxcall(:MXSymbolListAttrShallow, (MX_handle, Ref{MX_uint}, Ref{char_pp}),
-            self, ref_sz, ref_strings)
+            s, ref_sz, ref_strings)
   narg = 2*ref_sz[]
   strings = unsafe_wrap(Array, ref_strings[], narg)
   out = Dict{Symbol, String}()
@@ -209,17 +206,17 @@ function list_attr(self :: SymbolicNode)
 end
 
 """
-    list_all_attr(self :: SymbolicNode)
+    list_all_attr(s::SymbolicNode)
 
 Get all attributes from the symbol graph.
 
 Returns a dictionary of attributes.
 """
-function list_all_attr(self :: SymbolicNode)
+function list_all_attr(s::SymbolicNode)
   ref_sz    = Ref{MX_uint}(0)
   ref_strings = Ref{char_pp}(0)
   @mxcall(:MXSymbolListAttr, (MX_handle, Ref{MX_uint}, Ref{char_pp}),
-            self, ref_sz, ref_strings)
+            s, ref_sz, ref_strings)
   narg = 2*ref_sz[]
   strings = unsafe_wrap(Array, ref_strings[], narg)
   out = Dict{Symbol, String}()
@@ -232,7 +229,7 @@ function list_all_attr(self :: SymbolicNode)
 end
 
 """
-    set_attr(self:: SymbolicNode, key :: Symbol, value :: AbstractString)
+    set_attr(s::SymbolicNode, key::Symbol, value::AbstractString)
 
 Set the attribute key to value for this `SymbolicNode`.
 
@@ -242,15 +239,15 @@ Set the attribute key to value for this `SymbolicNode`.
     the attributes of a `SymbolicNode` that is already been used somewhere else might
     cause unexpected behavior and inconsistency.
 """
-function set_attr(self :: SymbolicNode, key :: Symbol, value :: AbstractString)
+function set_attr(s::SymbolicNode, key::Symbol, value::AbstractString)
   key_s = string(key)
   value_s = String(value)
 
-  @mxcall(:MXSymbolSetAttr, (MX_handle, Cstring, Cstring), self, key_s, value_s)
+  @mxcall(:MXSymbolSetAttr, (MX_handle, Cstring, Cstring), s, key_s, value_s)
 end
 
 """
-    get_name(self :: SymbolicNode)
+    get_name(s::SymbolicNode)
 
 Get the name of the symbol.
 
@@ -262,15 +259,15 @@ Get the name of the symbol.
     julia> mx.get_name(y)
     :fullyconnected0
 """
-function get_name(self :: mx.SymbolicNode)
-    name = Ref{mx.char_p}(0)
+function get_name(s::mx.SymbolicNode)
+    name = Ref{mx.char_p}(C_NULL)
     success = Ref(0)
-    @mxcall(:MXSymbolGetName, (MX_handle, Ref{char_p}, Ref{Int}), self.handle.value, name, success)
+    @mxcall(:MXSymbolGetName, (MX_handle, Ref{char_p}, Ref{Int}), s.handle.value, name, success)
     @assert success[] != -1
 
     str = name[]
     if str == C_NULL  # e.g. the symbol returned via get_internals
-        string(self.handle.value)
+        string(s.handle.value)
     else
         Symbol(unsafe_string(str))
     end
@@ -279,16 +276,6 @@ end
 Base.show(io::IO, sym::SymbolicNode) =
   print(io, "$(typeof(sym)) $(get_name(sym))")
 
-import Base: print
-
-function print(io::IO, sym::SymbolicNode)
-  out = Ref{mx.char_p}(C_NULL)
-  @mx.mxcall(:MXSymbolPrint, (mx.MX_SymbolHandle, Ref{mx.char_p}), sym.handle, out)
-  print(io, unsafe_string(out[]))
-end
-
-print(sym::SymbolicNode) = print(STDOUT, sym)
-
 """
     print([io::IO], sym::SymbolicNode)
 
@@ -298,7 +285,7 @@ Print the content of symbol, used for debug.
 julia> layer = @mx.chain mx.Variable(:data)           =>
          mx.FullyConnected(name=:fc1, num_hidden=128) =>
          mx.Activation(name=:relu1, act_type=:relu)
-MXNet.mx.SymbolicNode(MXNet.mx.MX_SymbolHandle(Ptr{Void} @0x000055b29b9c3520))
+MXNet.mx.SymbolicNode(MXNet.mx.MX_SymbolHandle(Ptr{Nothing} @0x000055b29b9c3520))
 
 julia> print(layer)
 Symbol Outputs:
@@ -322,25 +309,32 @@ Attrs:
         act_type=relu
 ```
 """
-print
+function Base.print(io::IO, sym::SymbolicNode)
+  out = Ref{mx.char_p}(C_NULL)
+  @mx.mxcall(:MXSymbolPrint, (mx.MX_SymbolHandle, Ref{mx.char_p}), sym.handle, out)
+  print(io, unsafe_string(out[]))
+end
+
+Base.print(sym::SymbolicNode) = print(STDOUT, sym)
 
 """
-    grad(self :: SymbolicNode, wrt :: Vector{SymbolicNode})
+    grad(s::SymbolicNode, wrt::Vector{Symbol})
 
 Get the autodiff gradient of the current `SymbolicNode`. This function can
 only be used if the current symbol is a loss function.
 
 # Arguments:
-* `self::SymbolicNode`: current node.
+* `s::SymbolicNode`: current node.
 * `wrt::Vector{Symbol}`: the names of the arguments to the gradient.
 
 Returns a gradient symbol of the corresponding gradient.
 """
-function grad(self :: SymbolicNode, wrt :: Vector{Symbol})
-  hdr_ref = Ref{MX_handle}(0)
-  keys = String[string(key) for key in wrt]
+function grad(s::SymbolicNode, wrt::Vector{Symbol})
+  hdr_ref = Ref{MX_handle}(C_NULL)
+  keys = string.(key)
 
-  @mxcall(:MXSymbolGrad, (MX_handle, MX_uint, char_pp, Ptr{MX_handle}), self, length(keys), keys, hdr_ref)
+  @mxcall(:MXSymbolGrad, (MX_handle, MX_uint, char_pp, Ptr{MX_handle}),
+          self, length(keys), keys, hdr_ref)
   return SymbolicNode(MX_SymbolHandle(hdr_ref[]))
 end
 
@@ -403,7 +397,7 @@ function _build_shapes(shape_size::MX_uint, shape_ndim::Ptr{MX_uint}, shape_data
   shape_data = unsafe_wrap(Array, shape_data, shape_size)
   shapes = map(1:shape_size) do i
     my_shape = unsafe_wrap(Array, shape_data[i], shape_ndim[i])
-    tuple(flipdim(Int[my_shape...],1)...)
+    tuple(reverse(Int[my_shape...], dims = 1)...)
   end
   convert(Vector{Tuple}, shapes)
 end
@@ -458,18 +452,18 @@ function infer_shape(self :: SymbolicNode; kwargs...)
   sdata  = MX_uint[]
   indptr = MX_uint[0]
   for (k,v) in kwargs
-    append!(sdata, flipdim([v...],1))
+    append!(sdata, reverse([v...], dims = 1))
     push!(indptr, length(sdata))
   end
   keys = AbstractString[string(x[1]) for x in kwargs]
   _infer_shape(self, keys, indptr, sdata)
 end
-function infer_shape(self :: SymbolicNode, args :: Union{Tuple, Void}...)
+function infer_shape(self :: SymbolicNode, args::Union{Tuple, Cvoid}...)
   sdata  = MX_uint[]
   indptr = MX_uint[0]
   for arg in args
-    if isa(arg, Void); continue; end
-    append!(sdata, flipdim([arg...],1))
+    if isa(arg, Cvoid); continue; end
+    append!(sdata, reverse([arg...], dims = 1))
     push!(indptr, length(sdata))
   end
   keys = Ptr{char_p}(0)
@@ -528,12 +522,12 @@ function infer_type(self :: SymbolicNode; kwargs...)
   _infer_type(self, keys, types)
 end
 
-function infer_type(self :: SymbolicNode, args :: Union{Tuple, Void}...)
+function infer_type(self :: SymbolicNode, args :: Union{Tuple,Cvoid}...)
   types = Cint[]
   keys = Ptr{char_p}(0)
 
   for arg in args
-    if isa(arg, Void); continue; end
+    if isa(arg, Cvoid); continue; end
     push!(types, toTypeFlag(arg))
   end
   _infer_type(self, keys, types)
@@ -548,7 +542,7 @@ indicating the index, as in the list of [`list_outputs`](@ref).
 """
 function Base.getindex(self :: SymbolicNode, idx :: Union{Base.Symbol, AbstractString})
   idx   = Symbol(idx)
-  i_idx = find(idx .== list_outputs(self))
+  i_idx = findall(idx .== list_outputs(self))
   @assert(length(i_idx) > 0, "Cannot find output with name '$idx'")
   @assert(length(i_idx) < 2, "Found duplicated output with name '$idx'")
   Base.getindex(self, i_idx[1])
@@ -582,8 +576,8 @@ end
 
 +(s::Real, x::SymbolicNode, ys::SymbolicNodeOrReal...) = +(x + s, ys...)
 
-broadcast_(::typeof(+), x::SymbolicNode, ys::SymbolicNodeOrReal...) = +(x, ys...)
-broadcast_(::typeof(+), s::Real, x::SymbolicNode, ys::SymbolicNodeOrReal...) = +(x + s, ys...)
+broadcasted(::typeof(+), x::SymbolicNode, ys::SymbolicNodeOrReal...) = +(x, ys...)
+broadcasted(::typeof(+), s::Real, x::SymbolicNode, ys::SymbolicNodeOrReal...) = +(x + s, ys...)
 
 import Base: -
 
@@ -600,8 +594,8 @@ s::Real         - x::SymbolicNode = _rminus_scalar(x, scalar=MX_float(s))
 
 -(x::SymbolicNode) = 0 - x
 
-broadcast_(::typeof(-), x::SymbolicNode, y::SymbolicNodeOrReal) = x - y
-broadcast_(::typeof(-), s::Real, x::SymbolicNode) = s - x
+broadcasted(::typeof(-), x::SymbolicNode, y::SymbolicNodeOrReal) = x - y
+broadcasted(::typeof(-), s::Real, x::SymbolicNode) = s - x
 
 import Base: *
 
@@ -613,7 +607,7 @@ Elementwise multiplication of `SymbolicNode`.
 x::SymbolicNode * s::Real = _mul_scalar(x, scalar=MX_float(s))
 s::Real * x::SymbolicNode = _mul_scalar(x, scalar=MX_float(s))
 
-function broadcast_(::typeof(*), x::SymbolicNode, ys::SymbolicNodeOrReal...)
+function broadcasted(::typeof(*), x::SymbolicNode, ys::SymbolicNodeOrReal...)
   ret = x
   for y in ys
     if y isa SymbolicNode
@@ -625,8 +619,8 @@ function broadcast_(::typeof(*), x::SymbolicNode, ys::SymbolicNodeOrReal...)
   ret
 end
 
-broadcast_(::typeof(*), s::Real, x::SymbolicNode, ys::SymbolicNodeOrReal...) =
-  broadcast_(*, x * s, ys...)
+broadcasted(::typeof(*), s::Real, x::SymbolicNode, ys::SymbolicNodeOrReal...) =
+  broadcasted(*, x * s, ys...)
 
 import Base: /
 
@@ -642,9 +636,9 @@ of the same shape.
 """
 x::SymbolicNode / s::Real = _DivScalar(x, scalar=MX_float(s))
 
-broadcast_(::typeof(/), x::SymbolicNode, y::SymbolicNode) = _div(x, y)
-broadcast_(::typeof(/), x::SymbolicNode, s::Real) = _div_scalar(x,  scalar=MX_float(s))
-broadcast_(::typeof(/), s::Real, x::SymbolicNode) = _rdiv_scalar(x, scalar=MX_float(s))
+broadcasted(::typeof(/), x::SymbolicNode, y::SymbolicNode) = _div(x, y)
+broadcasted(::typeof(/), x::SymbolicNode, s::Real) = _div_scalar(x,  scalar=MX_float(s))
+broadcasted(::typeof(/), s::Real, x::SymbolicNode) = _rdiv_scalar(x, scalar=MX_float(s))
 
 
 import Base: ^
@@ -657,22 +651,24 @@ Operating with `Real` is available.
 """
 ^
 
-broadcast_(::typeof(^), x::SymbolicNode, y::SymbolicNode) = _power(x, y)
-broadcast_(::typeof(^), x::SymbolicNode, s::Real) = _power_scalar(x,  scalar=MX_float(s))
-broadcast_(::typeof(^), s::Real, x::SymbolicNode) = _rpower_scalar(x, scalar=MX_float(s))
+broadcasted(::typeof(^), x::SymbolicNode, y::SymbolicNode) = _power(x, y)
+broadcasted(::typeof(^), x::SymbolicNode, s::Real) = _power_scalar(x,  scalar = s)
+broadcasted(::typeof(^), s::Real, x::SymbolicNode) = _rpower_scalar(x, scalar = s)
+broadcasted(::typeof(Base.literal_pow), ::typeof(^), x::SymbolicNode, ::Val{s}) where {s} =
+  _power_scalar(x, scalar = s)
 
-broadcast_(::typeof(^), ::Irrational{:e}, x::SymbolicNode) = exp(x)
-broadcast_(::typeof(^), x::SymbolicNode, s::Irrational) =
+broadcasted(::typeof(^), ::Irrational{:ℯ}, x::SymbolicNode) = exp(x)
+broadcasted(::typeof(^), x::SymbolicNode, s::Irrational) =
   _power_scalar(x, scalar=MX_float(s))
-broadcast_(::typeof(^), s::Irrational, x::SymbolicNode) =
+broadcasted(::typeof(^), s::Irrational, x::SymbolicNode) =
   _rpower_scalar(x, scalar=MX_float(s))
 
-function _compose!(node :: SymbolicNode; kwargs...)
-  name     = char_p(0)
-  arg_keys = AbstractString[]
+function _compose!(node::SymbolicNode; kwargs...)
+  name     = char_p(C_NULL)
+  arg_keys = AbstractString[]  # FIXME: can it be String[] ?
   arg_vals = MX_handle[]
 
-  for (k,v) in kwargs
+  for (k, v) in kwargs
     if k == :name
       name = string(v)
     else
@@ -685,20 +681,21 @@ function _compose!(node :: SymbolicNode; kwargs...)
   @mxcall(:MXSymbolCompose,
           (MX_handle, char_p, MX_uint, Ptr{char_p}, Ptr{MX_handle}),
           node, name, length(arg_keys), arg_keys, arg_vals)
-  return node
+  node
 end
-function _compose!(node :: SymbolicNode, args::SymbolicNode...)
+_compose!(node::SymbolicNode, args::SymbolicNode...) =
   _compose!(node, char_p(0), args...)
-end
-function _compose!(node :: SymbolicNode, name :: Union{Base.Symbol, char_p}, args::SymbolicNode...)
-  if isa(name, Base.Symbol); name = string(name); end
-  arg_keys = Ptr{char_p}(0)
+function _compose!(node::SymbolicNode, name::Union{Symbol, char_p}, args::SymbolicNode...)
+  if name isa Symbol
+    name = string(name)
+  end
+  arg_keys = Ptr{char_p}(C_NULL)
   arg_vals = MX_handle[args...]
 
   @mxcall(:MXSymbolCompose,
           (MX_handle, char_p, MX_uint, Ptr{char_p}, Ptr{MX_handle}),
           node, name, length(arg_vals), arg_keys, arg_vals)
-  return node
+  node
 end
 
 """
@@ -844,30 +841,26 @@ end
   node
 end
 
-function _define_atomic_symbol_creator(name :: String)
+function _define_atomic_symbol_creator(name::String)
   handle = _get_libmx_op_handle(name)
   f_desc, key_narg = _get_libmx_op_description(name, handle)
 
   f_desc *= "* `name::Symbol`: The name of the `SymbolicNode`. (e.g. `:my_symbol`), optional.\n"
-  f_desc *= "* `attrs::Dict{Symbol, AbstractString}`: The attributes associated with this `SymbolicNode`.\n\n"
+  f_desc *= "* `attrs::Dict{Symbol,String}`: The attributes associated with this `SymbolicNode`.\n\n"
 
   func_name = Symbol(name)
+  import_expr = _import_expr(func_name)
+
   func_def = quote
-  function $func_name(::Type{SymbolicNode}, args::SymbolicNode...; kwargs...)
-    idx = findfirst(x -> x[1] == :name, kwargs)
-    if idx > 0
-      name = kwargs[idx][2]
-    else
-      name = ""
-    end
+  function $func_name(::Type{SymbolicNode}, args::SymbolicNode...; name = "", kwargs...)
 
-    # XXX: hacky way of solving the problem that the arguments of `dot` should be swapped
+    # NOTE: hacky way of solving the problem that the arguments of `dot` should be swapped
     # See https://github.com/dmlc/MXNet.jl/issues/55
     if $name == "dot"
       args = reverse(args)
     end
 
-    # XXX: hacky way of solving the semantic difference of the axes parameter in Julia
+    # NOTE: hacky way of solving the semantic difference of the axes parameter in Julia
     # and in libmxnet.
     # See https://github.com/dmlc/MXNet.jl/pull/123
     if $name == "transpose"
@@ -876,8 +869,8 @@ function _define_atomic_symbol_creator(name :: String)
 
     param_keys = String[]
     param_vals = String[]
-    symbol_kws = Dict{Symbol, SymbolicNode}()
-    attrs = Dict{Symbol, String}()
+    symbol_kws = Dict{Symbol,SymbolicNode}()
+    attrs = Dict{Symbol,String}()
 
     $(if key_narg != ""
       quote
@@ -936,23 +929,24 @@ function _define_atomic_symbol_creator(name :: String)
   end # quote
 
   func_def2 = quote
-  @doc $f_desc ->
+  @doc $f_desc
   function $func_name(args::SymbolicNode...; kwargs...)
     $func_name(SymbolicNode, args...; kwargs...)
   end # function
   end # quote
 
   return quote
+    $import_expr
     $func_def
     $func_def2
   end
 end
 
 macro _import_atomic_symbol_creators()
-  # XXX: those are operators defined for NDArray, we exclude them here
+  # NOTE: those are operators defined for NDArray, we exclude them here
   # because the calling convention for the type signature is not strong
   # enough to disambiguate the method for NDArray and SymbolicNode
-  const ignored_ops = ["_set_value", "reshape"]  # in lowercase
+  ignored_ops = ("_set_value", "reshape")  # in lowercase
 
   op_names = _get_libmx_op_names()
   func_exprs = map(op_names) do name
@@ -966,7 +960,7 @@ macro _import_atomic_symbol_creators()
   end)
 end
 
-@_import_atomic_symbol_creators()
+@_import_atomic_symbol_creators
 
 ################################################################################
 # Utility macros to chain up symbols
@@ -976,7 +970,7 @@ macro chain(layers)
     last_layer = nothing
 
     function _chain_layer(layer, last_layer)
-        if isa(last_layer, Void)
+        if last_layer ≡ nothing
             return esc(layer)
         else
             if @capture(layer, f_(x__))
diff --git a/julia/src/util.jl b/julia/src/util.jl
index c07c9ec910fb..a836d3e39bba 100644
--- a/julia/src/util.jl
+++ b/julia/src/util.jl
@@ -19,7 +19,7 @@
 # Dataset related utilities
 ################################################################################
 function get_data_dir()
-  data_dir = joinpath(Pkg.dir("MXNet"), "data")
+  data_dir = joinpath(@__DIR__, "..", "data")
   mkpath(data_dir)
   data_dir
 end
@@ -32,7 +32,7 @@ function get_mnist_ubyte()
                    :train_label => "train-labels-idx1-ubyte",
                    :test_data   => "t10k-images-idx3-ubyte",
                    :test_label  => "t10k-labels-idx1-ubyte")
-  filenames = Dict(map((x) -> x[1] => joinpath(mnist_dir, x[2]), filenames))
+  filenames = Dict((x[1] => joinpath(mnist_dir, x[2]) for x ∈ pairs(filenames)))
   if !all(isfile, values(filenames))
     cd(mnist_dir) do
       mnist_dir = download("http://data.mxnet.io/mxnet/data/mnist.zip", "mnist.zip")
@@ -40,7 +40,7 @@ function get_mnist_ubyte()
           run(`unzip -u $mnist_dir`)
         catch
           try
-            run(pipe(`7z x $mnist_dir`,stdout=DevNull))
+            run(pipe(`7z x $mnist_dir`,stdout = devnull))
           catch
             error("Extraction Failed:No extraction program found in path")
           end
@@ -63,7 +63,7 @@ function get_cifar10()
           run(`unzip -u cifar10.zip`)
         catch
           try
-            run(pipeline(`7z x cifar10.zip`, stdout=DevNull))
+            run(pipeline(`7z x cifar10.zip`, stdout = devnull))
           catch
             error("Extraction Failed:No extraction program found in path")
           end
@@ -149,9 +149,8 @@ function _get_libmx_op_description(name::String, handle::MX_OpHandle)
   return desc, key_narg
 end
 
-function _format_typestring(typestr :: String)
-  replace(typestr, r"\bSymbol\b", "SymbolicNode")
-end
+_format_typestring(s::String) = replace(s, r"\bSymbol\b" => "SymbolicNode")
+
 function _format_docstring(narg::Int, arg_names::Ref{char_pp}, arg_types::Ref{char_pp}, arg_descs::Ref{char_pp}, remove_dup::Bool=true)
   param_keys = Set{String}()
 
@@ -191,7 +190,7 @@ function _getdocdefine(name::String)
   op = _get_libmx_op_handle(name)
   str = _get_libmx_op_description(name, op)[1]
   lines = split(str, '\n')
-  for m ∈ match.(r"^Defined in .*$", lines)
+  for m ∈ match.(Ref(r"^Defined in .*$"), lines)
     m != nothing && return m.match
   end
   ""
@@ -227,7 +226,7 @@ function _sig_checker()
       return
     end
 
-    warn(_sig)
+    @warn(_sig)
 
   end
 end
@@ -253,3 +252,20 @@ function _firstarg(sig::Expr)
 end
 
 _firstarg(s::Symbol) = s
+
+const _import_map = Dict{Symbol,Union{Missing,Module}}(
+  :diag    => LinearAlgebra,
+  :dot     => LinearAlgebra,
+  :norm    => LinearAlgebra,
+
+  :shuffle => Random,
+
+  :mean    => Statistics,
+
+  :gamma   => missing,
+)
+
+function _import_expr(func_name::Symbol)
+  mod = get(_import_map, func_name, Base)
+  isdefined(mod, func_name) ? :(import $(Symbol(mod)): $func_name) : :()
+end
diff --git a/julia/src/visualize.jl b/julia/src/visualize.jl
index b41db7e9e5a7..ddbb3c11fe9b 100644
--- a/julia/src/visualize.jl
+++ b/julia/src/visualize.jl
@@ -30,14 +30,14 @@ import JSON
 Returns the graph description in GraphViz `dot` language.
 """
 function to_graphviz(network :: SymbolicNode; title="Network Visualization", input_shapes=nothing)
-  if !isa(input_shapes, Void)
+  if !isa(input_shapes, Cvoid)
     internals = get_internals(network)
     if isa(input_shapes, Dict)
       _, out_shapes, _ = infer_shape(internals; input_shapes...)
     else
       _, out_shapes, _ = infer_shape(internals, input_shapes...)
     end
-    @assert(!isa(out_shapes, Void), "Failed to do shape inference, input shapes are incomplete")
+    @assert(!isa(out_shapes, Cvoid), "Failed to do shape inference, input shapes are incomplete")
     shape_dict = Dict(zip(list_outputs(internals), out_shapes))
     draw_shape = true
   else
@@ -170,7 +170,7 @@ end
 
 function _format_graphviz_attr(io::IOBuffer, attrs)
   label = get(attrs, :label, nothing)
-  if isa(label, Void)
+  if isa(label, Cvoid)
     print(io, " [")
   else
     print(io, " [label=$(_simple_escape(label)),")
@@ -195,7 +195,7 @@ function _format_graphviz_attr(io::IOBuffer, attrs)
   println(io, "];")
 end
 function _simple_escape(str)
-  str = replace(string(str), r"\n", "\\n")
+  str = replace(string(str), r"\n" =>  "\\n")
   return "\"$str\""
 end
 function _format_graphviz_node(io::IOBuffer, name::AbstractString, attrs)
diff --git a/julia/test/runtests.jl b/julia/test/runtests.jl
index 8b46bfda6726..4e5f273950a7 100644
--- a/julia/test/runtests.jl
+++ b/julia/test/runtests.jl
@@ -15,29 +15,33 @@
 # specific language governing permissions and limitations
 # under the License.
 
+using Test
 using MXNet
-using Base.Test
 
 # run test in the whole directory, latest modified files
 # are run first, this makes waiting time shorter when writing
 # or modifying unit-tests
 function test_dir(dir)
-  jl_files = sort(filter(x -> ismatch(r".*\.jl$", x), readdir(dir)), by = fn -> stat(joinpath(dir,fn)).mtime)
-  map(reverse(jl_files)) do file
+  jl_files = sort(
+    filter(x -> occursin(r".*\.jl$", x), readdir(dir)),
+           by = fn -> stat(joinpath(dir, fn)).mtime)
+  foreach(reverse(jl_files)) do file
     include("$dir/$file")
   end
 end
 
-info("libmxnet version => $(mx.LIB_VERSION)")
+@info "libmxnet version => $(mx.LIB_VERSION[])"
 
-include(joinpath(dirname(@__FILE__), "common.jl"))
+const BASEDIR = joinpath(@__DIR__, "..")
+
+include(joinpath(@__DIR__, "common.jl"))
 @testset "MXNet Test" begin
-  test_dir(joinpath(dirname(@__FILE__), "unittest"))
+  test_dir(joinpath(@__DIR__, "unittest"))
 
   # run the basic MNIST mlp example
   if haskey(ENV, "CONTINUOUS_INTEGRATION")
     @testset "MNIST Test" begin
-      include(joinpath(Pkg.dir("MXNet"), "examples", "mnist", "mlp-test.jl"))
+      include(joinpath(BASEDIR, "examples", "mnist", "mlp-test.jl"))
     end
   end
 end
diff --git a/julia/test/unittest/autograd.jl b/julia/test/unittest/autograd.jl
index 96f275b626af..8209fe7e0327 100644
--- a/julia/test/unittest/autograd.jl
+++ b/julia/test/unittest/autograd.jl
@@ -17,9 +17,8 @@
 
 module TestAutoGrad
 
-using Base.Test
-
 using MXNet
+using Test
 
 
 function checkgradient(f, x, y, ∇)
@@ -33,12 +32,12 @@ end  # function checkgradient
 
 
 function test_getgrad()
-  info("AutoGrad::getgrad")
+  @info("AutoGrad::getgrad")
 
-  info("AutoGrad::getgrad::unattached")
+  @info("AutoGrad::getgrad::unattached")
   @test nothing == mx.getgrad(mx.zeros(10))
 
-  info("AutoGrad::getgrad::attached")
+  @info("AutoGrad::getgrad::attached")
   x = mx.NDArray([1 2; 3 4])
   grad = mx.attach_grad!(x)
   @test eltype(grad) ≡ Int
@@ -50,7 +49,7 @@ end
 
 
 function test_mark_variables!()
-  info("AutoGrad::mark_variables!")
+  @info("AutoGrad::mark_variables!")
   x = mx.zeros(4)
   ẋ = mx.zeros(4)
   y = mx.zeros(4)
@@ -62,13 +61,13 @@ function test_mark_variables!()
   @test copy(mx.getgrad(x)) == [42, 42, 42, 42]
   @test copy(mx.getgrad(y)) == [24, 24, 24, 24]
 
-  info("AutoGrad::mark_variables!::invalid grad_reqs")
+  @info("AutoGrad::mark_variables!::invalid grad_reqs")
   x = mx.zeros(4)
   y = mx.zeros(4)
   @test_throws ArgumentError mx.mark_variables!(x, y, :magic)
   @test_throws ArgumentError mx.mark_variables!([x], [y], [:magic])
 
-  info("AutoGrad::mark_variables!::args length mismatch")
+  @info("AutoGrad::mark_variables!::args length mismatch")
   x = mx.zeros(4)
   y = mx.zeros(4)
   z = mx.zeros(4)
@@ -79,7 +78,7 @@ end
 
 function test_record()
   let x = mx.NDArray([1 2; 3 4])
-    info("AutoGrad::record::backward!")
+    @info("AutoGrad::record::backward!")
 
     y = [1 4; 9 16]
     ∇ = [2 4; 6 8]  # gradient is 2x
@@ -89,7 +88,7 @@ function test_record()
   end
 
   let x = mx.NDArray([1 2; 3 4])
-    info("AutoGrad::record::symbol")
+    @info("AutoGrad::record::symbol")
 
     mx.attach_grad!(x)
     y = mx.record() do
@@ -102,7 +101,7 @@ function test_record()
   end
 
   let x = mx.NDArray([1 2; 3 4])
-    info("AutoGrad::record::backward!(retain_graph=true)")
+    @info("AutoGrad::record::backward!(retain_graph=true)")
 
     mx.attach_grad!(x)
     y = mx.record() do
@@ -125,7 +124,7 @@ end  # function test_record
 
 
 function test_is_recording()
-  info("AutoGrad::is_recording")
+  @info("AutoGrad::is_recording")
   mx.record() do
     @test mx.is_recording()
   end
@@ -133,7 +132,7 @@ end  # function test_is_recording
 
 
 function test_is_training()
-  info("AutoGrad::is_training")
+  @info("AutoGrad::is_training")
   mx.record() do
     @test mx.is_training()
   end
@@ -145,7 +144,7 @@ end  # function test_is_training
 
 
 function test_pause()
-  info("AutoGrad::pause")
+  @info("AutoGrad::pause")
   let x = mx.NDArray([1 2; 3 4])
     ∇ = mx.attach_grad!(x)
     y = mx.record() do
@@ -166,7 +165,7 @@ end  # function test_pause
 
 
 function test_train_mode()
-  info("AutoGrad::train_mode")
+  @info("AutoGrad::train_mode")
   let x = mx.NDArray(Float32[1 2; 3 4])
     y = mx.train_mode() do
       mx.Dropout(x, p = 1)
@@ -178,7 +177,7 @@ end  # function test_train_mode
 
 
 function test_predict_mode()
-  info("AutoGrad::predict_mode")
+  @info("AutoGrad::predict_mode")
   let x = mx.NDArray(Float32[1 2; 3 4])
     y = mx.predict_mode() do
       mx.Dropout(x, p = 1)
@@ -190,7 +189,7 @@ end  # function test_train_mode
 
 
 function test_backward!()
-  info("AutoGrad::backward!::with head_grad")
+  @info("AutoGrad::backward!::with head_grad")
   let x = mx.NDArray(Float32[1 2; 3 4]), A = Float32[.2 .4; 0 .1]
     ∇ = mx.attach_grad!(x)
     y = mx.record() do
@@ -200,7 +199,7 @@ function test_backward!()
     @test copy(∇) ≈ [2 4; 6 8] .* A
   end
 
-  info("AutoGrad::backward!::with head_grads")
+  @info("AutoGrad::backward!::with head_grads")
   let x = mx.NDArray(Float32[1 2; 3 4])
     ∇ = mx.attach_grad!(x)
     mx.record() do
@@ -216,7 +215,7 @@ function test_backward!()
     @test copy(∇) ≈ ans
   end
 
-  info("AutoGrad::backward!::ArgumentError")
+  @info("AutoGrad::backward!::ArgumentError")
   let x = mx.NDArray([42])
     @test_throws ArgumentError mx.backward!([x], [24])
   end
@@ -224,7 +223,7 @@ end  # function test_backward!
 
 
 function test_symbol()
-  info("AutoGrad::symbol")
+  @info("AutoGrad::symbol")
 
   let x = mx.zeros(4)
     mx.attach_grad!(x)
@@ -234,9 +233,9 @@ end
 
 
 function test_add()
-  info("AutoGrad::add")
+  @info("AutoGrad::add")
 
-  info("AutoGrad::add::x")
+  @info("AutoGrad::add::x")
   let x = mx.NDArray([1 2; 3 4])
     y = [1 2; 3 4]
     ∇ = [1 1; 1 1]  # gradient is 1
@@ -245,7 +244,7 @@ function test_add()
     end
   end
 
-  info("AutoGrad::add::+x")
+  @info("AutoGrad::add::+x")
   let x = mx.NDArray([1 2; 3 4])
     y = [1 2; 3 4]
     ∇ = [1 1; 1 1]  # gradient is 1
@@ -254,7 +253,7 @@ function test_add()
     end
   end
 
-  info("AutoGrad::add::x .+ 42")
+  @info("AutoGrad::add::x .+ 42")
   let x = mx.NDArray([1 2; 3 4])
     y = [43 44; 45 46]
     ∇ = [1 1; 1 1]  # gradient is 1
@@ -263,7 +262,7 @@ function test_add()
     end
   end
 
-  info("AutoGrad::add::42 .+ x")
+  @info("AutoGrad::add::42 .+ x")
   let x = mx.NDArray([1 2; 3 4])
     y = [43 44; 45 46]
     ∇ = [1 1; 1 1]
@@ -272,14 +271,14 @@ function test_add()
     end
   end
 
-  # TODO: info("AutoGrad::add::x .+ y")
+  # TODO: @info("AutoGrad::add::x .+ y")
 end  # function test_add
 
 
 function test_sub()
-  info("AutoGrad::sub")
+  @info("AutoGrad::sub")
 
-  info("AutoGrad::sub::-x")
+  @info("AutoGrad::sub::-x")
   let x = mx.NDArray([1 2; 3 4])
     y = [-1 -2; -3 -4]
     ∇ = [-1 -1; -1 -1]  # gradient is -1
@@ -288,7 +287,7 @@ function test_sub()
     end
   end
 
-  info("AutoGrad::sub::x .- 42")
+  @info("AutoGrad::sub::x .- 42")
   let x = mx.NDArray([1 2; 3 4])
     y = [-41 -40; -39 -38]
     ∇ = [1 1; 1 1]
@@ -297,7 +296,7 @@ function test_sub()
     end
   end
 
-  info("AutoGrad::sub::42 .- x")
+  @info("AutoGrad::sub::42 .- x")
   let x = mx.NDArray([1 2; 3 4])
     y = [41 40; 39 38]
     ∇ = -[1 1; 1 1]
@@ -306,14 +305,14 @@ function test_sub()
     end
   end
 
-  # TODO: info("AutoGrad::add::x .- y")
+  # TODO: @info("AutoGrad::sub::x .- y")
 end  # function test_sub
 
 
 function test_mul()
-  info("AutoGrad::mul")
+  @info("AutoGrad::mul")
 
-  info("AutoGrad::mul::2x .* x")
+  @info("AutoGrad::mul::2x .* x")
   let x = mx.NDArray([1 2; 3 4])
     y = [2 8; 18 32]
     ∇ = [4 8; 12 16]  # 4x
@@ -322,7 +321,7 @@ function test_mul()
     end
   end
 
-  info("AutoGrad::mul::x * 2 .* x")
+  @info("AutoGrad::mul::x * 2 .* x")
   let x = mx.NDArray([1 2; 3 4])
     y = [2 8; 18 32]
     ∇ = [4 8; 12 16]  # 4x
@@ -334,9 +333,9 @@ end
 
 
 function test_div()
-  info("AutoGrad::div")
+  @info("AutoGrad::div")
 
-  info("AutoGrad::div::x ./ 2")
+  @info("AutoGrad::div::x ./ 2")
   let x = mx.NDArray(Float32[1 2; 3 4])
     y = Float32[.5 1; 1.5 2]
     ∇ = [.5 .5; .5 .5]
@@ -345,7 +344,7 @@ function test_div()
     end
   end
 
-  info("AutoGrad::rdiv::2 ./ x")
+  @info("AutoGrad::rdiv::2 ./ x")
   let A = Float32[1 2; 3 4], x = mx.NDArray(A)
     y = 2 ./ A
     ∇ = @. -2 / A^2  # -2 / x²
@@ -357,9 +356,9 @@ end  # function test_div
 
 
 function test_power()
-  info("AutoGrad::power")
+  @info("AutoGrad::power")
 
-  info("AutoGrad::power::x.^3")
+  @info("AutoGrad::power::x.^3")
   let A = Float32[1 2; 3 4]
     x = mx.NDArray(A)
     y = A.^3
@@ -369,7 +368,7 @@ function test_power()
     end
   end
 
-  info("AutoGrad::power::x.^.5")
+  @info("AutoGrad::power::x.^.5")
   let A = Float32[1 2; 3 4]
     x = mx.NDArray(A)
     y = A.^.5
diff --git a/julia/test/unittest/bind.jl b/julia/test/unittest/bind.jl
index 538b556bf474..abaca884bab1 100644
--- a/julia/test/unittest/bind.jl
+++ b/julia/test/unittest/bind.jl
@@ -17,7 +17,7 @@
 
 module TestBind
 using MXNet
-using Base.Test
+using Test
 
 using ..Main: rand_dims
 
@@ -26,7 +26,7 @@ using ..Main: rand_dims
 ################################################################################
 function test_arithmetic(::Type{T}, uf, gf) where T <: mx.DType
   shape = rand_dims()
-  info("Bind::arithmetic::$T::$uf::dims = $shape")
+  @info "Bind::arithmetic::$T::$uf::dims = $shape"
 
   lhs = mx.Variable(:lhs)
   rhs = mx.Variable(:rhs)
@@ -77,7 +77,7 @@ function test_arithmetic()
     test_arithmetic(T, (x,y) -> x .- y, (g,x,y) -> (g,-g))
     test_arithmetic(T, (x,y) -> x .* y, (g,x,y) -> (y.*g, x.*g))
     if T <: Integer || T == Float16
-      warn("Not running division test for $T")
+      @warn "Not running division test for $T"
     else
       test_arithmetic(T, (x,y) -> x ./ y, (g,x,y) -> (g ./ y, -x .* g ./ (y.^2)))
     end
diff --git a/julia/test/unittest/io.jl b/julia/test/unittest/io.jl
index 81f2ff79a83b..cf8d8368d212 100644
--- a/julia/test/unittest/io.jl
+++ b/julia/test/unittest/io.jl
@@ -18,12 +18,12 @@
 module TestIO
 
 using MXNet
-using Base.Test
+using Test
 
 using ..Main: rand_dims
 
 function test_mnist()
-  info("IO::MNIST")
+  @info "IO::MNIST"
   filenames = mx.get_mnist_ubyte()
 
   batch_size = 10
@@ -76,7 +76,7 @@ function test_arrays_impl(data::Vector, label::Vector, provider::mx.ArrayDataPro
     @test batch_size == d2[end]
   end
 
-  info("IO::Array::#data=$(length(data)),#label=$(length(label)),batch_size=$batch_size")
+  @info "IO::Array::#data=$(length(data)),#label=$(length(label)),batch_size=$batch_size"
   for (idx, batch) in zip(idx_all, provider)
     data_batch = [x[[Colon() for i=1:ndims(x)-1]..., idx:min(idx+batch_size-1,sample_count)] for x in data]
     data_get   = mx.get_data(provider, batch)
@@ -111,7 +111,7 @@ function test_arrays()
 end
 
 function test_arrays_shuffle()
-  info("IO::Array::shuffle")
+  @info "IO::Array::shuffle"
 
   sample_count = 15
   batch_size   = 4
diff --git a/julia/test/unittest/kvstore.jl b/julia/test/unittest/kvstore.jl
index 96e1643d8d83..503a1fdbd533 100644
--- a/julia/test/unittest/kvstore.jl
+++ b/julia/test/unittest/kvstore.jl
@@ -17,7 +17,7 @@
 
 module TestKVStore
 using MXNet
-using Base.Test
+using Test
 
 using ..Main: rand_dims
 
@@ -34,7 +34,7 @@ function init_kv()
 end
 
 function test_kv_basic()
-  info("KVStore::basic")
+  @info("KVStore::basic")
 
   kv = init_kv()
   @test mx.get_type(kv) == :local
@@ -43,7 +43,7 @@ function test_kv_basic()
 end
 
 function test_single_kv_pair()
-  info("KVStore::single")
+  @info("KVStore::single")
 
   kv = init_kv()
   mx.push!(kv, 3, mx.ones(SHAPE))
@@ -53,7 +53,7 @@ function test_single_kv_pair()
 end
 
 function test_aggregator()
-  info("KVStore::aggregator")
+  @info("KVStore::aggregator")
 
   kv = init_kv()
 
@@ -64,7 +64,7 @@ function test_aggregator()
   mx.push!(kv, 3, vals)
   mx.pull!(kv, 3, vals)
   for v in vals
-    @test maximum(abs.(copy(v)) - num_devs) == 0
+    @test maximum(abs.(copy(v)) .- num_devs) == 0
   end
 
   # list
@@ -74,7 +74,7 @@ function test_aggregator()
 
   for vv in vals
     for v in vv
-      @test maximum(abs.(copy(v)) - 2 * num_devs) == 0
+      @test maximum(abs.(copy(v)) .- 2 * num_devs) == 0
     end
   end
 end
@@ -95,7 +95,7 @@ function check_setupdater!(f)
 end  # function check_setupdater!
 
 function test_setupdater!()
-  info("KVStore::setupdater!")
+  @info("KVStore::setupdater!")
 
   f(key, Δ, x) = @mx.inplace x += 2Δ
   g(key, Δ, x) = (x[:] += 2Δ)
diff --git a/julia/test/unittest/metric.jl b/julia/test/unittest/metric.jl
index 32c4538b608c..05e4dbda47f4 100644
--- a/julia/test/unittest/metric.jl
+++ b/julia/test/unittest/metric.jl
@@ -18,7 +18,7 @@
 module TestMetric
 
 using MXNet
-using Base.Test
+using Test
 
 ################################################################################
 # Supporting functions
@@ -58,7 +58,7 @@ end
 ################################################################################
 
 function test_ace()
-    info("EvalMetric::ACE")
+    @info "EvalMetric::ACE"
     n_categories   = 4
     n_observations = 100
     labels         = convert(Vector{Float32}, rand(0:(n_categories - 1), n_observations))    # MXNet uses Float32
@@ -72,7 +72,7 @@ end
 
 
 function test_nmse()
-    info("EvalMetric::NMSE")
+    @info "EvalMetric::NMSE"
 
     @testset "EvalMetric::NMSE::update!" begin
         metric = mx.NMSE()
diff --git a/julia/test/unittest/model.jl b/julia/test/unittest/model.jl
index dbe97093dc68..387a0cd555ab 100644
--- a/julia/test/unittest/model.jl
+++ b/julia/test/unittest/model.jl
@@ -17,19 +17,19 @@
 
 module TestModel
 
-using Base.Test
+using Test
 using MXNet
 
 
 function test_feedforward()
-  info("Model::FeedForward::constructor")
+  @info("Model::FeedForward::constructor")
   let x = @mx.var x
     m = mx.FeedForward(x)
     @test m.arch === x
     @test length(m.ctx) == 1
   end
 
-  info("Model::FeedForward::constructor::keyword context")
+  @info("Model::FeedForward::constructor::keyword context")
   let x = @mx.var x
     m = mx.FeedForward(x, context = mx.cpu())
     @test m.arch === x
diff --git a/julia/test/unittest/name.jl b/julia/test/unittest/name.jl
index eea73efb43c9..1099ec4a7df5 100644
--- a/julia/test/unittest/name.jl
+++ b/julia/test/unittest/name.jl
@@ -17,10 +17,10 @@
 
 module TestNameManager
 using MXNet
-using Base.Test
+using Test
 
 function test_default()
-  info("NameManager::default")
+  @info("NameManager::default")
 
   name = :_____aaaaa_____
   @test get!(mx.DEFAULT_NAME_MANAGER, name, "") == name
@@ -32,7 +32,7 @@ function test_default()
 end
 
 function test_prefix()
-  info("NameManager::prefix")
+  @info("NameManager::prefix")
 
   name   = :_____bbbbb_____
   prefix = :_____foobar_____
diff --git a/julia/test/unittest/ndarray.jl b/julia/test/unittest/ndarray.jl
index 3bacbb009dfc..9ca4ba206027 100644
--- a/julia/test/unittest/ndarray.jl
+++ b/julia/test/unittest/ndarray.jl
@@ -18,7 +18,9 @@
 module TestNDArray
 
 using MXNet
-using Base.Test
+using Statistics
+using LinearAlgebra
+using Test
 
 using ..Main: rand_dims
 
@@ -33,7 +35,7 @@ function rand_tensors(::Type{T}, dims::NTuple{N,Int}) where {N,T}
 end
 
 function test_constructor()
-  info("NDArray::NDArray(x::AbstractArray)")
+  @info("NDArray::NDArray(x::AbstractArray)")
   function check_absarray(x)
     y = mx.NDArray(x)
     @test ndims(x)  == ndims(y)
@@ -44,7 +46,7 @@ function test_constructor()
   check_absarray(1:10)
   check_absarray(1.0:10)
 
-  info("NDArray::NDArray(Type, AbstractArray)")
+  @info("NDArray::NDArray(Type, AbstractArray)")
   let
     x = mx.NDArray(Float32, [1, 2, 3])
     @test eltype(x) == Float32
@@ -59,7 +61,7 @@ end  # function test_constructor
 
 
 function test_ones_zeros_like()
-  info("NDArray::Base.zeros")
+  @info("NDArray::Base.zeros")
   let x = mx.rand(1, 3, 2, 4, low = 1, high = 10)
     y = zeros(x)
     @test sum(copy(y)) == 0
@@ -68,7 +70,7 @@ function test_ones_zeros_like()
     @test sum(copy(y)) == 0
   end
 
-  info("NDArray::Base.ones")
+  @info("NDArray::Base.ones")
   let x = mx.rand(1, 3, 2, 4, low = 1, high = 10)
     y = ones(x)
     @test sum(copy(y)) == 1 * 3 * 2 * 4
@@ -83,7 +85,7 @@ function test_copy()
   dims    = rand_dims()
   tensor  = rand(mx.MX_float, dims)
 
-  info("NDArray::copy::dims = $dims")
+  @info("NDArray::copy::dims = $dims")
 
   # copy to NDArray and back
   array   = copy(tensor, mx.cpu())
@@ -95,7 +97,7 @@ function test_copy()
   tensor2 = copy(array2)
   @test tensor ≈ tensor2
 
-  info("NDArray::copy::AbstractArray")
+  @info("NDArray::copy::AbstractArray")
   let x = copy(1:4, mx.cpu())
     @test eltype(x) == Int
     @test copy(x) == [1, 2, 3, 4]
@@ -106,7 +108,7 @@ function test_copy()
     @test copy(x) ≈ [1., 2, 3, 4]
   end
 
-  info("NDArray::copy!::AbstractArray")
+  @info("NDArray::copy!::AbstractArray")
   let
     x = mx.zeros(4)
     copy!(x, 1:4)
@@ -117,7 +119,7 @@ function test_copy()
 end
 
 function test_deepcopy()
-  info("NDArray::deepcopy")
+  @info("NDArray::deepcopy")
 
   x = mx.zeros(2, 5)
   y = deepcopy(x)
@@ -129,7 +131,7 @@ function test_assign()
   dims    = rand_dims()
   tensor  = rand(mx.MX_float, dims)
 
-  info("NDArray::assign::dims = $dims")
+  @info("NDArray::assign::dims = $dims")
 
   # Julia Array -> NDArray assignment
   array   = mx.empty(size(tensor))
@@ -145,21 +147,21 @@ function test_assign()
   # scalar -> NDArray assignment
   scalar    = rand()
   array2[:] = scalar
-  @test zeros(size(tensor)) + scalar ≈ copy(array2)
+  @test zeros(size(tensor)) .+ scalar ≈ copy(array2)
 
   scalar = rand(Float16)
   array2[:] = scalar
-  @test zeros(size(tensor)) + scalar ≈ copy(array2)
+  @test zeros(size(tensor)) .+ scalar ≈ copy(array2)
 
   scalar = rand(Float64)
   array2[:] = scalar
   array3[:] = scalar
-  @test zeros(size(tensor)) + scalar ≈ copy(array2)
-  @test zeros(Float16, size(tensor)) + scalar ≈ copy(array3)
+  @test zeros(size(tensor)) .+ scalar ≈ copy(array2)
+  @test zeros(Float16, size(tensor)) .+ scalar ≈ copy(array3)
 
   # NDArray -> NDArray assignment
   array[:]  = array2
-  @test zeros(size(tensor)) + scalar ≈ copy(array)
+  @test zeros(size(tensor)) .+ scalar ≈ copy(array)
 end
 
 function test_slice()
@@ -170,7 +172,7 @@ function test_slice()
 end
 
 function test_linear_idx()
-  info("NDArray::getindex::linear indexing")
+  @info("NDArray::getindex::linear indexing")
   let A = reshape(1:30, 3, 10)
     x = mx.NDArray(A)
 
@@ -201,7 +203,7 @@ function test_linear_idx()
     @test copy(x[14]) == [14]
   end
 
-  info("NDArray::setindex!::linear indexing")
+  @info("NDArray::setindex!::linear indexing")
   let A = reshape(1:24, 3, 2, 4)
     x = mx.NDArray(A)
 
@@ -217,7 +219,7 @@ function test_linear_idx()
     @test copy(x[24]) == [42]
   end
 
-  info("NDArray::setindex!::type convert")
+  @info("NDArray::setindex!::type convert")
   let
     x = NDArray([1, 2, 3])
     @test eltype(x) == Int
@@ -227,7 +229,7 @@ function test_linear_idx()
 end  # function test_linear_idx
 
 function test_first()
-  info("NDArray::first")
+  @info("NDArray::first")
   let A = reshape(1:30, 3, 10)
     x = mx.NDArray(A)
 
@@ -239,12 +241,12 @@ function test_first()
   end
 end  # function test_first
 
-function test_endof()
-  info("NDArray::endof")
+function test_lastindex()
+  @info("NDArray::lastindex")
   let A = [1 2; 3 4; 5 6], x = mx.NDArray(A)
-    @test endof(A) == endof(x)
+    @test lastindex(A) == lastindex(x)
   end
-end  # function test_endof
+end  # function test_lastindex
 
 function test_cat()
   function check_cat(f, A, B = 2A)
@@ -270,39 +272,39 @@ function test_cat()
   end
 
   let A = [1, 2, 3, 4]
-    info("NDArray::hcat::1D")
+    @info("NDArray::hcat::1D")
     check_cat(:hcat, A)
 
-    info("NDArray::vcat::1D")
+    @info("NDArray::vcat::1D")
     check_cat(:vcat, A)
   end
 
   let A = [1 2; 3 4]
-    info("NDArray::hcat::2D")
+    @info("NDArray::hcat::2D")
     check_cat(:hcat, A)
 
-    info("NDArray::vcat::2D")
+    @info("NDArray::vcat::2D")
     check_cat(:vcat, A)
   end
 
   let A = rand(4, 3, 2)
-    info("NDArray::hcat::3D")
+    @info("NDArray::hcat::3D")
     check_cat(:hcat, A)
 
-    info("NDArray::vcat::3D")
+    @info("NDArray::vcat::3D")
     check_cat(:vcat, A)
   end
 
   let A = rand(4, 3, 2, 2)
-    info("NDArray::hcat::4D")
+    @info("NDArray::hcat::4D")
     check_cat(:hcat, A)
 
-    info("NDArray::vcat::4D")
+    @info("NDArray::vcat::4D")
     check_cat(:vcat, A)
   end
 
   let A = [1, 2, 3, 4]
-    info("NDArray::cat::3D/1D")
+    @info("NDArray::cat::3D/1D")
     check_cat(:vcat, reshape(A, 4, 1, 1), 2A)
   end
 end  # function test_cat
@@ -313,46 +315,45 @@ function test_plus()
   t2, a2 = rand_tensors(dims)
   t3, a3 = rand_tensors(dims)
 
-  info("NDArray::plus::dims = $dims")
+  @info("NDArray::plus::dims = $dims")
 
-  @test t1 + t2  ≈ copy(a1 + a2)
   @test t1 .+ t2 ≈ copy(a1 .+ a2)
 
-  @test t1 + t2 + t3 ≈ copy(a1 + a2 + a3)
+  @test t1 .+ t2 .+ t3 ≈ copy(a1 .+ a2 .+ a3)
 
   # test inplace += operation
   a0 = a1               # keep a reference to a1
   @mx.inplace a1 += a2  # perform inplace +=
   @test a0 == a1        # make sure they are still the same object
   @test copy(a0) ≈ copy(a1)
-  @test copy(a1) ≈ t1 + t2
+  @test copy(a1) ≈ t1 .+ t2
 
   # test scalar
   scalar = rand()
-  @test t3 + scalar      ≈ copy(a3 + scalar)
-  @test t2 + scalar + t3 ≈ copy(a2 + scalar + a3)
+  @test t3 .+ scalar       ≈ copy(a3 .+ scalar)
+  @test t2 .+ scalar .+ t3 ≈ copy(a2 .+ scalar .+ a3)
 
   # test small and large scalar
   t4 = zeros(Float32, dims)
   a4 = copy(t4, mx.cpu())
   scalar_small = 1e-8
   scalar_large = 1e8
-  @test t4 + scalar_small ≈ copy(a4 .+ scalar_small)
-  @test t4 + scalar_large ≈ copy(a4 .+ scalar_large)
+  @test t4 .+ scalar_small ≈ copy(a4 .+ scalar_small)
+  @test t4 .+ scalar_large ≈ copy(a4 .+ scalar_large)
 
   t5 = zeros(Float64, dims)
   a5 = copy(t5, mx.cpu())
   scalar_small = 1e-8
   scalar_large = 1e8
-  @test t5 + scalar_small ≈ copy(a5 .+ scalar_small)
-  @test t5 + scalar_large ≈ copy(a5 .+ scalar_large)
+  @test t5 .+ scalar_small ≈ copy(a5 .+ scalar_small)
+  @test t5 .+ scalar_large ≈ copy(a5 .+ scalar_large)
 
   t6 = zeros(Float16, dims)
   a6 = copy(t6, mx.cpu())
   scalar_small = Float16(1e-5)
   scalar_large = Float16(1e4)
-  @test t6 + scalar_small ≈ copy(a6 .+ scalar_small)
-  @test t6 + scalar_large ≈ copy(a6 .+ scalar_large)
+  @test t6 .+ scalar_small ≈ copy(a6 .+ scalar_small)
+  @test t6 .+ scalar_large ≈ copy(a6 .+ scalar_large)
 
   let x = mx.NDArray([1 2; 3 4]), y = mx.NDArray([1 1; 1 1])
     @test copy(42 .+ x) == [43 44; 45 46]
@@ -360,7 +361,7 @@ function test_plus()
     @test copy(0 .+ x .+ y .+ 41) == [43 44; 45 46]
   end
 
-  info("NDArray::plus::scalar::type convert")
+  @info("NDArray::plus::scalar::type convert")
   let x = mx.NDArray([1, 2, 3])
     y = x .+ 0.5
     @test copy(y) == copy(x)
@@ -369,7 +370,7 @@ function test_plus()
     @test copy(y) == [3, 4, 5]
   end
 
-  info("NDArray::broadcast_add")
+  @info("NDArray::broadcast_add")
   let
     A = [1 2 3;
          4 5 6]
@@ -392,9 +393,8 @@ function test_minus()
   t1, a1 = rand_tensors(dims)
   t2, a2 = rand_tensors(dims)
 
-  info("NDArray::minus::dims = $dims")
+  @info("NDArray::minus::dims = $dims")
 
-  @test t1 - t2  ≈ copy(a1 - a2)
   @test t1 .- t2 ≈ copy(a1 .- a2)
 
   @test -t1 ≈ copy(-a1)
@@ -409,40 +409,40 @@ function test_minus()
   @test a0 == a1       # make sure they are still the same object
   @test a0.handle == a1.handle
   @test copy(a0) ≈ copy(a1)
-  @test copy(a1) ≈ t1 - t2
+  @test copy(a1) ≈ t1 .- t2
 
   # test scalar
   scalar = rand()
-  @test t2 - scalar ≈ copy(a2 - scalar)
+  @test t2 .- scalar ≈ copy(a2 .- scalar)
 
   # test small and large scalar
   t4 = zeros(Float32, dims)
   a4 = copy(t4, mx.cpu())
   scalar_small = 1e-8
   scalar_large = 1e8
-  @test t4 - scalar_small ≈ copy(a4 .- scalar_small)
-  @test t4 - scalar_large ≈ copy(a4 .- scalar_large)
+  @test t4 .- scalar_small ≈ copy(a4 .- scalar_small)
+  @test t4 .- scalar_large ≈ copy(a4 .- scalar_large)
 
   t5 = zeros(Float64, dims)
   a5 = copy(t5, mx.cpu())
   scalar_small = 1e-8
   scalar_large = 1e8
-  @test t5 - scalar_small ≈ copy(a5 .- scalar_small)
-  @test t5 - scalar_large ≈ copy(a5 .- scalar_large)
+  @test t5 .- scalar_small ≈ copy(a5 .- scalar_small)
+  @test t5 .- scalar_large ≈ copy(a5 .- scalar_large)
 
   t6 = zeros(Float16, dims)
   a6 = copy(t6, mx.cpu())
   scalar_small = Float16(1e-5)
   scalar_large = Float16(1e4)
-  @test t6 - scalar_small ≈ copy(a6 .- scalar_small)
-  @test t6 - scalar_large ≈ copy(a6 .- scalar_large)
+  @test t6 .- scalar_small ≈ copy(a6 .- scalar_small)
+  @test t6 .- scalar_large ≈ copy(a6 .- scalar_large)
 
-  info("NDArray::minus::scalar::type convert")
+  @info("NDArray::minus::scalar::type convert")
   let x = mx.NDArray([1, 2, 3])
     @test copy(x .- π) ≈ [-2, -1, 0]
   end
 
-  info("NDArray::broadcast_minus")
+  @info("NDArray::broadcast_minus")
   let
     A = [1 2 3;
          4 5 6]
@@ -458,6 +458,18 @@ function test_minus()
     # @inplace x .-= y
     # @test copy(x) == A .- B
   end
+
+  @info("NDArray::scalar::rminus")
+  let
+    A = [1 2 3;
+         4 5 6]
+    B = 10 .- A
+
+    x = NDArray(A)
+    y = 10 .- x
+
+    @test copy(y) == B
+  end
 end
 
 function test_mul()
@@ -466,9 +478,9 @@ function test_mul()
   t2, a2 = rand_tensors(dims)
   t3, a3 = rand_tensors(dims)
 
-  info("NDArray::mul::dims = $dims")
+  @info("NDArray::mul::dims = $dims")
 
-  @test t1 .* t2 ≈ copy(a1.*a2)
+  @test t1 .* t2 ≈ copy(a1 .* a2)
 
   # test inplace .*= operation
   a0 = a1               # keep a reference to a1
@@ -480,7 +492,7 @@ function test_mul()
 
   # test scalar
   scalar = mx.MX_float(rand())
-  @test t3 * scalar ≈ copy(a3 .* scalar)
+  @test t3 .* scalar ≈ copy(a3 .* scalar)
 
   # test small and large scalar
   t4, a4 = rand_tensors(Float32, dims)
@@ -499,26 +511,26 @@ function test_mul()
   scalar_small = Float16(1e-5)
   @test t6 * scalar_small ≈ copy(a6 .* scalar_small)
 
-  info("NDArray::mul::matrix multiplication")
+  @info("NDArray::mul::matrix multiplication")
   let x = mx.NDArray([1.  2])
     y = x' * x
     @test copy(y) == [1. 2; 2 4]
   end
 
-  info("NDArray::mul::elementwise::issue 253")
+  @info("NDArray::mul::elementwise::issue 253")
   let x = mx.NDArray([1.  2])
     y = x .* x
     @test copy(y) == [1. 4.]
   end
 
-  info("NDArray::mul::scalar::type convert")
+  @info("NDArray::mul::scalar::type convert")
   let x = mx.NDArray([1, 2, 3])
     y = x .* π
     @test eltype(x) == Int
     @test copy(y) == [3, 6, 9]
   end
 
-  info("NDArray::broadcast_mul")
+  @info("NDArray::broadcast_mul")
   let
     A = [1 2 3;
          4 5 6]
@@ -541,7 +553,7 @@ function test_div()
   t1, a1 = rand_tensors(dims)
   t2, a2 = rand_tensors(dims)
 
-  info("NDArray::div::dims = $dims")
+  @info("NDArray::div::dims = $dims")
   t2             .+= 2  # avoid numerical instability
   @mx.inplace a2 .+= 2
 
@@ -576,7 +588,7 @@ function test_div()
   scalar_large = 1e4
   @test t6 ./ scalar_large ≈ copy(a6 ./ scalar_large)
 
-  info("NDArray::div::scalar::type convert")
+  @info("NDArray::div::scalar::type convert")
   let x = mx.NDArray([1, 2, 3])
     y = x ./ 1.1
     @test eltype(y) == Int
@@ -589,7 +601,7 @@ function test_div()
     @test_throws AssertionError x ./ 0.5
   end
 
-  info("NDArray::broadcast_div")
+  @info("NDArray::broadcast_div")
   let
     A = Float32[1 2 3;
                 4 5 6]
@@ -607,37 +619,36 @@ function test_div()
   end
 end
 
-
 function test_rdiv()
-  info("NDArray::rdiv")
+  @info("NDArray::rdiv")
 
-  info("NDArray::rdiv::Inf16")
+  @info("NDArray::rdiv::Inf16")
   let x = 1 ./ mx.zeros(Float16, 4)
     @test copy(x) == [Inf16, Inf16, Inf16, Inf16]
   end
 
-  info("NDArray::rdiv::Inf32")
+  @info("NDArray::rdiv::Inf32")
   let x = 1 ./ mx.zeros(Float32, 4)
     @test copy(x) == [Inf32, Inf32, Inf32, Inf32]
   end
 
-  info("NDArray::rdiv::Inf64")
+  @info("NDArray::rdiv::Inf64")
   let x = 1 ./ mx.zeros(Float64, 4)
     @test copy(x) == [Inf64, Inf64, Inf64, Inf64]
   end
 
-  info("NDArray::rdiv::Int")
+  @info("NDArray::rdiv::Int")
   let x = 1 ./ mx.NDArray([1 2; 3 4])
     @test copy(x) == [1 0; 0 0]
   end
 
-  info("NDArray::rdiv::Float32")
+  @info("NDArray::rdiv::Float32")
   let x = 1 ./ mx.NDArray(Float32[1 2; 3 4])
     y = 1 ./ Float32[1 2; 3 4]
     @test copy(x) ≈ y
   end
 
-  info("NDArray::rdiv::type convert")
+  @info("NDArray::rdiv::type convert")
   let x = mx.NDArray([1, 2, 3])
     y = 5.5 ./ x
     @test eltype(y) == Int  # this differs from julia
@@ -645,11 +656,10 @@ function test_rdiv()
   end
 end  # function test_rdiv
 
-
 function test_mod()
-  info("NDArray::mod")
-  const A = [1 2; 3 4]
-  const B = [1 1; 3 3]
+  @info("NDArray::mod")
+  A = [1 2; 3 4]
+  B = [1 1; 3 3]
 
   let x = NDArray(A), y = NDArray(B)
     C = A .% B
@@ -662,21 +672,21 @@ function test_mod()
     @test copy(z) ≈ D
   end
 
-  info("NDArray::mod::scalar")
+  @info("NDArray::mod::scalar")
   let x = NDArray(A)
     C = A .% 2
     y = x .% 2
     @test copy(y) ≈ C
   end
 
-  info("NDArray::rmod")
+  @info("NDArray::rmod")
   let x = NDArray(A)
     C = 11 .% A
     y = 11 .% x
     @test copy(y) ≈ C
   end
 
-  info("NDArray::mod_from!")
+  @info("NDArray::mod_from!")
   let
     x = NDArray(A)
     y = NDArray(B)
@@ -694,7 +704,7 @@ function test_mod()
     @test copy(y) ≈ C
   end
 
-  info("NDArray::mod_from!::scalar")
+  @info("NDArray::mod_from!::scalar")
   let
     x = NDArray(A)
     C = A .% 2
@@ -702,7 +712,7 @@ function test_mod()
     @test copy(x) ≈ C
   end
 
-  info("NDArray::rmod_from!")
+  @info("NDArray::rmod_from!")
   let
     x = NDArray(A)
     C = 11 .% A
@@ -710,7 +720,7 @@ function test_mod()
     @test copy(x) ≈ C
   end
 
-  info("NDArray::mod_from!::writable")
+  @info("NDArray::mod_from!::writable")
   let
     x = NDArray(A)
     y = NDArray(B)
@@ -722,7 +732,7 @@ function test_mod()
     @test_throws AssertionError mx.rmod_from!(2, x)
   end
 
-  info("NDArray::mod::inplace")
+  @info("NDArray::mod::inplace")
   let
     x = NDArray(A)
     y = NDArray(B)
@@ -731,7 +741,7 @@ function test_mod()
     @test copy(x) ≈ C
   end
 
-  info("NDArray::broadcast_mod")
+  @info("NDArray::broadcast_mod")
   let
     A = [1 2 3;
          4 5 6]
@@ -749,13 +759,12 @@ function test_mod()
   end
 end  # function test_mod
 
-
 function test_gd()
   dims   = rand_dims()
   tw, aw = rand_tensors(dims)
   tg, ag = rand_tensors(dims)
 
-  info("NDArray::gd::dims = $dims")
+  @info("NDArray::gd::dims = $dims")
 
   lr = rand()
   wd = rand()
@@ -765,10 +774,9 @@ function test_gd()
   @test copy(aw) ≈ tw
 end
 
-
 function test_saveload()
   n_arrays = 5
-  info("NDArray::saveload::n_arrays = $n_arrays")
+  @info("NDArray::saveload::n_arrays = $n_arrays")
   fname = tempname()
 
   # save and load a single array
@@ -807,7 +815,7 @@ end
 
 function test_clip()
   dims = rand_dims()
-  info("NDArray::clip::dims = $dims")
+  @info("NDArray::clip::dims = $dims")
 
   j_array, nd_array = rand_tensors(dims)
   clip_up   = maximum(abs.(j_array)) / 2
@@ -819,7 +827,7 @@ function test_clip()
 
   @test all(clip_down .<= copy(clipped) .<= clip_up)
 
-  info("NDArray::clip!")
+  @info("NDArray::clip!")
   let
     x = NDArray(1.0:20)
     clip!(x, 5, 15)
@@ -828,91 +836,91 @@ function test_clip()
 end
 
 function test_power()
-  info("NDArray::power")
+  @info("NDArray::power")
 
-  info("NDArray::power::Int::x.^n")
+  @info("NDArray::power::Int::x .^ n")
   let x = mx.NDArray([1 2; 3 4])
     @test eltype(x) == Int
-    @test copy(x.^-1)  == [1 0; 0 0]
-    @test copy(x.^0)   == [1 1; 1 1]
-    @test copy(x.^1)   == [1 2; 3 4]
-    @test copy(x.^1.1) == [1 2; 3 4]
-    @test copy(x.^2)   == [1 4; 9 16]
-    @test copy(x.^2.9) == [1 4; 9 16]
-    @test copy(x.^3)   == [1 8; 27 64]
+    @test copy(x .^ -1)  == [1 0; 0 0]
+    @test copy(x .^ 0)   == [1 1; 1 1]
+    @test copy(x .^ 1)   == [1 2; 3 4]
+    @test copy(x .^ 1.1) == [1 2; 3 4]
+    @test copy(x .^ 2)   == [1 4; 9 16]
+    @test copy(x .^ 2.9) == [1 4; 9 16]
+    @test copy(x .^ 3)   == [1 8; 27 64]
   end
 
-  info("NDArray::power::Int::n.^x")
+  @info("NDArray::power::Int::n .^ x")
   let x = mx.NDArray([1 2; 3 4])
     @test eltype(x) == Int
-    @test copy(0.^x)   == [0 0; 0 0]
-    @test copy(1.^x)   == [1 1; 1 1]
-    @test copy(1.1.^x) == [1 1; 1 1]
-    @test copy(2.^x)   == [2 4; 8 16]
-    @test copy(2.9.^x) == [2 4; 8 16]
-    @test copy(3.^x)   == [3 9; 27 81]
+    @test copy(0   .^ x)   == [0 0; 0 0]
+    @test copy(1   .^ x)   == [1 1; 1 1]
+    @test copy(1.1 .^ x) == [1 1; 1 1]
+    @test copy(2   .^ x)   == [2 4; 8 16]
+    @test copy(2.9 .^ x) == [2 4; 8 16]
+    @test copy(3   .^ x)   == [3 9; 27 81]
   end
 
-  info("NDArray::power::Int::x.^y")
+  @info("NDArray::power::Int::x .^ y")
   let x = mx.NDArray([1 2; 3 4]), y = mx.NDArray([2 2; 2 2])
     @test eltype(x) == Int
     @test eltype(y) == Int
-    @test copy(x.^y) == [1 4; 9 16]
-    @test copy(y.^x) == [2 4; 8 16]
+    @test copy(x .^ y) == [1 4; 9 16]
+    @test copy(y .^ x) == [2 4; 8 16]
   end
 
-  info("NDArray::power::Float32::x.^n")
+  @info("NDArray::power::Float32::x .^ n")
   let x = mx.NDArray(Float32[1 2; 3 4]), A = Float32[1 2; 3 4]
     @test eltype(x) == Float32
-    @test copy(x.^0) == Float32[1 1; 1 1]
-    @test copy(x.^1) == Float32[1 2; 3 4]
-    @test copy(x.^2) == Float32[1 4; 9 16]
-    @test copy(x.^3) == Float32[1 8; 27 64]
+    @test copy(x .^ 0) == Float32[1 1; 1 1]
+    @test copy(x .^ 1) == Float32[1 2; 3 4]
+    @test copy(x .^ 2) == Float32[1 4; 9 16]
+    @test copy(x .^ 3) == Float32[1 8; 27 64]
 
-    @test copy(x.^-1)  ≈ A.^-1
-    @test copy(x.^1.1) ≈ A.^1.1
-    @test copy(x.^2.9) ≈ A.^2.9
+    @test copy(x .^ -1)  ≈ A .^ -1
+    @test copy(x .^ 1.1) ≈ A .^ 1.1
+    @test copy(x .^ 2.9) ≈ A .^ 2.9
   end
 
-  info("NDArray::power::Float32::n.^x")
+  @info("NDArray::power::Float32::n .^ x")
   let x = mx.NDArray(Float32[1 2; 3 4]), A = Float32[1 2; 3 4]
     @test eltype(x) == Float32
-    @test copy(0.^x) == Float32[0 0; 0 0]
-    @test copy(1.^x) == Float32[1 1; 1 1]
-    @test copy(2.^x) == Float32[2 4; 8 16]
-    @test copy(3.^x) == Float32[3 9; 27 81]
+    @test copy(0 .^ x) == Float32[0 0; 0 0]
+    @test copy(1 .^ x) == Float32[1 1; 1 1]
+    @test copy(2 .^ x) == Float32[2 4; 8 16]
+    @test copy(3 .^ x) == Float32[3 9; 27 81]
 
-    @test copy(1.1.^x) ≈ 1.1.^A
-    @test copy(2.9.^x) ≈ 2.9.^A
+    @test copy(1.1 .^ x) ≈ 1.1 .^ A
+    @test copy(2.9 .^ x) ≈ 2.9 .^ A
   end
 
-  info("NDArray::power::Float32::x.^y")
+  @info("NDArray::power::Float32::x .^ y")
   let x = mx.NDArray(Float32[1 2; 3 4]), y = mx.NDArray(Float32[2 2; 2 2])
     @test eltype(x) == Float32
     @test eltype(y) == Float32
-    @test copy(x.^y) == Float32[1 4; 9 16]
-    @test copy(y.^x) == Float32[2 4; 8 16]
+    @test copy(x .^ y) == Float32[1 4; 9 16]
+    @test copy(y .^ x) == Float32[2 4; 8 16]
   end
 
-  info("NDArray::power::e.^x::x.^e")
+  @info("NDArray::power::ℯ .^ x::x .^ ℯ")
   let x = mx.zeros(2, 3), A = [1 1 1; 1 1 1]
-    @test copy(e.^x) ≈ A
+    @test copy(ℯ .^ x) ≈ A
   end
 
   let A = Float32[1 2; 3 4], x = mx.NDArray(A)
-    @test copy(e.^x) ≈ e.^A
-    @test copy(x.^e) ≈ A.^e
+    @test copy(ℯ .^ x) ≈ ℯ .^ A
+    @test copy(x .^ ℯ) ≈ A .^ ℯ
   end
 
-  info("NDArray::power::π.^x::x.^π")
+  @info("NDArray::power::π .^ x::x .^ π")
   let A = Float32[1 2; 3 4], x = mx.NDArray(A)
-    @test copy(π.^x) ≈ π.^A
-    @test copy(x.^π) ≈ A.^π
+    @test copy(π .^ x) ≈ π .^ A
+    @test copy(x .^ π) ≈ A .^ π
   end
 
   # TODO: Float64: wait for https://github.com/apache/incubator-mxnet/pull/8012
 
-  info("NDArray::broadcast_power")
+  @info("NDArray::broadcast_power")
   let
     A = [1 2 3;
          4 5 6]
@@ -932,7 +940,7 @@ end # function test_power
 
 function test_sqrt()
   dims = rand_dims()
-  info("NDArray::sqrt::dims = $dims")
+  @info("NDArray::sqrt::dims = $dims")
 
   j_array, nd_array = rand_tensors(dims)
   sqrt_ed = sqrt(nd_array)
@@ -941,7 +949,7 @@ end
 
 function test_nd_as_jl()
   dims = (2, 3)
-  info("NDArray::nd_as_jl::dims = $dims")
+  @info("NDArray::nd_as_jl::dims = $dims")
 
   x = mx.zeros(dims) + 5
   y = mx.ones(dims)
@@ -952,7 +960,7 @@ function test_nd_as_jl()
     end
 
     z[:, 1] = y[:, 1]
-    y[:] = 0
+    y .= 0
   end
 
   @test sum(copy(y)) == 0
@@ -963,7 +971,7 @@ end
 function test_dot()
   dims1 = (2, 3)
   dims2 = (3, 8)
-  info("NDArray::dot")
+  @info("NDArray::dot")
 
   x = mx.zeros(dims1)
   y = mx.zeros(dims2)
@@ -974,7 +982,7 @@ function test_dot()
   y = mx.zeros(1, 2, 3)
   @test_throws mx.MXError dot(x, y)  # dimension mismatch
 
-  info("NDArray::matrix mul")
+  @info("NDArray::matrix mul")
   let
     A = [1. 2 3; 4 5 6]
     B = [-1., -2, -3]
@@ -997,7 +1005,7 @@ function test_dot()
 end
 
 function test_eltype()
-  info("NDArray::eltype")
+  @info("NDArray::eltype")
   dims1 = (3,3)
 
   x = mx.empty(dims1)
@@ -1011,7 +1019,7 @@ function test_eltype()
 end
 
 function test_reshape()
-  info("NDArray::reshape")
+  @info("NDArray::reshape")
   A = rand(2, 3, 4)
 
   B = reshape(NDArray(A), 4, 3, 2)
@@ -1022,7 +1030,7 @@ function test_reshape()
   @test size(C) == (4, 3, 2)
   @test copy(C)[3, 1, 1] == A[1, 2, 1]
 
-  info("NDArray::reshape::reverse")
+  @info("NDArray::reshape::reverse")
   A = mx.zeros(10, 5, 4)
 
   B = reshape(A, -1, 0)
@@ -1033,7 +1041,7 @@ function test_reshape()
 end
 
 function test_expand_dims()
-  info("NDArray::expand_dims")
+  @info("NDArray::expand_dims")
   let A = [1, 2, 3, 4], x = NDArray(A)
     @test size(x) == (4,)
 
@@ -1059,44 +1067,44 @@ function test_expand_dims()
 end  # test_expand_dims
 
 function test_sum()
-  info("NDArray::sum")
+  @info("NDArray::sum")
 
   let A = reshape(1.0:8, 2, 2, 2), X = mx.NDArray(A)
-    @test copy(sum(X))[]       == sum(A)
-    @test copy(sum(X, 1))      == sum(A, 1)
-    @test copy(sum(X, 2))      == sum(A, 2)
-    @test copy(sum(X, 3))      == sum(A, 3)
-    @test copy(sum(X, [1, 2])) == sum(A, [1, 2])
-    @test copy(sum(X, (1, 2))) == sum(A, (1, 2))
+    @test copy(sum(X))[]              == sum(A)
+    @test copy(sum(X, dims = 1))      == sum(A, dims = 1)
+    @test copy(sum(X, dims = 2))      == sum(A, dims = 2)
+    @test copy(sum(X, dims = 3))      == sum(A, dims = 3)
+    @test copy(sum(X, dims = [1, 2])) == sum(A, dims = [1, 2])
+    @test copy(sum(X, dims = (1, 2))) == sum(A, dims = (1, 2))
   end
 end
 
 function test_mean()
-  info("NDArray::mean")
+  @info("NDArray::mean")
 
   let A = reshape(1.0:8, 2, 2, 2), X = mx.NDArray(A)
-    @test copy(mean(X))[]       == mean(A)
-    @test copy(mean(X, 1))      == mean(A, 1)
-    @test copy(mean(X, 2))      == mean(A, 2)
-    @test copy(mean(X, 3))      == mean(A, 3)
-    @test copy(mean(X, [1, 2])) == mean(A, [1, 2])
-    @test copy(mean(X, (1, 2))) == mean(A, (1, 2))
+    @test copy(mean(X))[]              == mean(A)
+    @test copy(mean(X, dims = 1))      == mean(A, dims = 1)
+    @test copy(mean(X, dims = 2))      == mean(A, dims = 2)
+    @test copy(mean(X, dims = 3))      == mean(A, dims = 3)
+    @test copy(mean(X, dims = [1, 2])) == mean(A, dims = [1, 2])
+    @test copy(mean(X, dims = (1, 2))) == mean(A, dims = (1, 2))
   end
 end
 
 function test_maximum()
-  info("NDArray::maximum")
+  @info("NDArray::maximum")
 
   let A = reshape(1.0:8, 2, 2, 2), X = mx.NDArray(A)
-    @test copy(maximum(X))[]       == maximum(A)
-    @test copy(maximum(X, 1))      == maximum(A, 1)
-    @test copy(maximum(X, 2))      == maximum(A, 2)
-    @test copy(maximum(X, 3))      == maximum(A, 3)
-    @test copy(maximum(X, [1, 2])) == maximum(A, [1, 2])
-    @test copy(maximum(X, (1, 2))) == maximum(A, (1, 2))
+    @test copy(maximum(X))[]              == maximum(A)
+    @test copy(maximum(X, dims = 1))      == maximum(A, dims = 1)
+    @test copy(maximum(X, dims = 2))      == maximum(A, dims = 2)
+    @test copy(maximum(X, dims = 3))      == maximum(A, dims = 3)
+    @test copy(maximum(X, dims = [1, 2])) == maximum(A, dims = [1, 2])
+    @test copy(maximum(X, dims = (1, 2))) == maximum(A, dims = (1, 2))
   end
 
-  info("NDArray::broadcast_maximum")
+  @info("NDArray::broadcast_maximum")
   let
     A = [1 2 3;
          4 5 6]
@@ -1111,18 +1119,18 @@ function test_maximum()
 end
 
 function test_minimum()
-  info("NDArray::minimum")
+  @info("NDArray::minimum")
 
   let A = reshape(1.0:8, 2, 2, 2), X = mx.NDArray(A)
-    @test copy(minimum(X))[]       == minimum(A)
-    @test copy(minimum(X, 1))      == minimum(A, 1)
-    @test copy(minimum(X, 2))      == minimum(A, 2)
-    @test copy(minimum(X, 3))      == minimum(A, 3)
-    @test copy(minimum(X, [1, 2])) == minimum(A, [1, 2])
-    @test copy(minimum(X, (1, 2))) == minimum(A, (1, 2))
+    @test copy(minimum(X))[]              == minimum(A)
+    @test copy(minimum(X, dims = 1))      == minimum(A, dims = 1)
+    @test copy(minimum(X, dims = 2))      == minimum(A, dims = 2)
+    @test copy(minimum(X, dims = 3))      == minimum(A, dims = 3)
+    @test copy(minimum(X, dims = [1, 2])) == minimum(A, dims = [1, 2])
+    @test copy(minimum(X, dims = (1, 2))) == minimum(A, dims = (1, 2))
   end
 
-  info("NDArray::broadcast_minimum")
+  @info("NDArray::broadcast_minimum")
   let
     A = [1 2 3;
          4 5 6]
@@ -1137,20 +1145,20 @@ function test_minimum()
 end
 
 function test_prod()
-  info("NDArray::prod")
+  @info("NDArray::prod")
 
   let A = reshape(1.0:8, 2, 2, 2), X = mx.NDArray(A)
-    @test copy(prod(X))[]       == prod(A)
-    @test copy(prod(X, 1))      == prod(A, 1)
-    @test copy(prod(X, 2))      == prod(A, 2)
-    @test copy(prod(X, 3))      == prod(A, 3)
-    @test copy(prod(X, [1, 2])) == prod(A, [1, 2])
-    @test copy(prod(X, (1, 2))) == prod(A, (1, 2))
+    @test copy(prod(X))[]              == prod(A)
+    @test copy(prod(X, dims = 1))      == prod(A, dims = 1)
+    @test copy(prod(X, dims = 2))      == prod(A, dims = 2)
+    @test copy(prod(X, dims = 3))      == prod(A, dims = 3)
+    @test copy(prod(X, dims = [1, 2])) == prod(A, dims = [1, 2])
+    @test copy(prod(X, dims = (1, 2))) == prod(A, dims = (1, 2))
   end
 end
 
 function test_fill()
-  info("NDArray::fill")
+  @info("NDArray::fill")
 
   let x = mx.fill(42, 2, 3, 4)
     @test eltype(x) == Int
@@ -1176,7 +1184,7 @@ function test_fill()
     @test copy(x) ≈ fill(Float32(42), 2, 3, 4)
   end
 
-  info("NDArray::fill!::arr")
+  @info("NDArray::fill!::arr")
   let x = fill!(mx.zeros(2, 3, 4), 42)
     @test eltype(x) == Float32
     @test size(x) == (2, 3, 4)
@@ -1185,19 +1193,19 @@ function test_fill()
 end  # function test_fill
 
 function test_transpose()
-  info("NDArray::transpose::1D")
+  @info("NDArray::transpose::1D")
   let A = rand(Float32, 4), x = NDArray(A)
     @test size(x) == (4,)
     @test size(x') == (1, 4)
   end
 
-  info("NDArray::transpose::2D")
+  @info("NDArray::transpose::2D")
   let A = rand(Float32, 2, 3), x = mx.NDArray(A)
     @test size(x) == (2, 3)
     @test size(x') == (3, 2)
   end
 
-  info("NDArray::permutedims")
+  @info("NDArray::permutedims")
   let A = collect(Float32, reshape(1.0:24, 2, 3, 4)), x = mx.NDArray(A)
     A′ = permutedims(A, [2, 1, 3])
     x′ = permutedims(x, [2, 1, 3])
@@ -1207,36 +1215,37 @@ function test_transpose()
 end
 
 function test_show()
-  info("NDArray::show::REPL")
+  @info("NDArray::show::REPL")
   let str = sprint(show, MIME"text/plain"(), mx.NDArray([1 2 3 4]))
-    @test contains(str, "1×4")
-    @test contains(str, "mx.NDArray")
-    @test contains(str, "Int64")
-    @test contains(str, "CPU")
+    @test occursin("1×4", str)
+    @test occursin("NDArray", str)
+    @test occursin("Int64", str)
+    @test occursin("CPU", str)
     @test match(r"1\s+2\s+3\s+4", str) != nothing
   end
 
-  info("NDArray::show")
+  @info("NDArray::show")
   let str = sprint(show, mx.NDArray([1 2 3 4]))
-    @test str == "NDArray [1 2 3 4]"
+    @test str == "NDArray([1 2 3 4])"
   end
 
   let str = sprint(show, mx.zeros(4))
-    @test str == "NDArray Float32[0.0, 0.0, 0.0, 0.0]"
+    @test str == "NDArray(Float32[0.0, 0.0, 0.0, 0.0])"
   end
 end
 
 function test_size()
-  info("NDArray::size")
+  @info("NDArray::size")
   let A = [1 2; 3 4; 5 6], x = mx.NDArray(A)
     @test size(A) == size(x)
-    @test size(A, 1, 2, 3, 4, 5) == size(x, 1, 2, 3, 4, 5)
-    @inferred size(x, 1, 2, 3, 4, 5)
+    dims = (1, 2, 3, 4, 5)
+    @test map(d -> size(A, d), dims) == map(d -> size(x, d), dims)
+    @inferred map(d -> size(x, d), dims)
   end
 end  # function test_size()
 
 function check_trigonometric(f)
-  info("NDArray::$f")
+  @info("NDArray::$f")
   let A = [.1 .2; .3 .4], x = mx.NDArray(A)
     B = f.(A)
     y = f.(x)
@@ -1257,7 +1266,7 @@ function test_trigonometric()
 end  # function test_trigonometric
 
 function check_hyperbolic(f, A)
-  info("NDArray::$f")
+  @info("NDArray::$f")
   let x = NDArray(A)
     B = f.(A)
     y = f.(x)
@@ -1283,10 +1292,10 @@ function test_hyperbolic()
 end  # function test_hyperbolic
 
 function test_act_funcs()
-  info("NDArray::σ/sigmoid")
+  @info("NDArray::σ/sigmoid")
   let
     A = Float32[.1, .2, -.3, -.4]
-    B = @. 1 / (1 + e^(-A))
+    B = @. 1 / (1 + ℯ ^ (-A))
     x = NDArray(A)
     y = σ.(x)
     @test copy(y) ≈ B
@@ -1295,7 +1304,7 @@ function test_act_funcs()
     @test copy(z) ≈ B
   end
 
-  info("NDArray::relu")
+  @info("NDArray::relu")
   let
     A = [1, 2, -3, -4]
     B = max.(A, 0)
@@ -1304,7 +1313,7 @@ function test_act_funcs()
     @test copy(y) ≈ B
   end
 
-  info("NDArray::softmax::1D")
+  @info("NDArray::softmax::1D")
   let
     A = Float32[1, 2, 3, 4]
     B = exp.(A) ./ sum(exp.(A))
@@ -1313,20 +1322,20 @@ function test_act_funcs()
     @test copy(y) ≈ B
   end
 
-  info("NDArray::softmax::2D")
+  @info("NDArray::softmax::2D")
   let
     A = Float32[1 2; 3 4]
-    B = exp.(A) ./ sum(exp.(A), 1)
+    B = exp.(A) ./ sum(exp.(A), dims = 1)
     x = NDArray(A)
     y = softmax.(x, 1)
     @test copy(y) ≈ B
 
-    C = exp.(A) ./ sum(exp.(A), 2)
+    C = exp.(A) ./ sum(exp.(A), dims = 2)
     z = softmax.(x, 2)
     @test copy(z) ≈ C
   end
 
-  info("NDArray::log_softmax::1D")
+  @info("NDArray::log_softmax::1D")
   let
     A = Float32[1, 2, 3, 4]
     B = log.(exp.(A) ./ sum(exp.(A)))
@@ -1335,15 +1344,15 @@ function test_act_funcs()
     @test copy(y) ≈ B
   end
 
-  info("NDArray::log_softmax::2D")
+  @info("NDArray::log_softmax::2D")
   let
     A = Float32[1 2; 3 4]
-    B = log.(exp.(A) ./ sum(exp.(A), 1))
+    B = log.(exp.(A) ./ sum(exp.(A), dims = 1))
     x = NDArray(A)
     y = log_softmax.(x, 1)
     @test copy(y) ≈ B
 
-    C = log.(exp.(A) ./ sum(exp.(A), 2))
+    C = log.(exp.(A) ./ sum(exp.(A), dims = 2))
     z = log_softmax.(x, 2)
     @test copy(z) ≈ C
   end
@@ -1369,27 +1378,27 @@ macro check_equal(op)
 end
 
 function test_equal()
-  info("NDArray::broadcast_equal")
+  @info("NDArray::broadcast_equal")
   @check_equal ==
 
-  info("NDArray::broadcast_not_equal")
+  @info("NDArray::broadcast_not_equal")
   @check_equal !=
 
-  info("NDArray::broadcast_greater")
+  @info("NDArray::broadcast_greater")
   @check_equal >
 
-  info("NDArray::broadcast_greater_equal")
+  @info("NDArray::broadcast_greater_equal")
   @check_equal >=
 
-  info("NDArray::broadcast_lesser")
+  @info("NDArray::broadcast_lesser")
   @check_equal <
 
-  info("NDArray::broadcast_lesser_equal")
+  @info("NDArray::broadcast_lesser_equal")
   @check_equal <=
 end  # function test_equal
 
 function test_broadcast_to()
-  info("NDArray::broadcast_to")
+  @info("NDArray::broadcast_to")
   A = [1 2 3]
   x = NDArray(A)
   @test mx.broadcast_to(x, (1, 3)) |> copy == A
@@ -1400,20 +1409,20 @@ function test_broadcast_to()
 end  # function test_broadcast_to
 
 function test_broadcast_axis()
-  info("NDArray::broadcast_axis")
+  @info("NDArray::broadcast_axis")
   A = reshape([1, 2, 3], 1, 3, 1)
   x = NDArray(A)
 
   @test mx.broadcast_axis(x, 1, 4) |> copy == [A; A; A; A]
-  @test mx.broadcast_axis(x, 3, 2) |> copy == cat(3, A, A)
+  @test mx.broadcast_axis(x, 3, 2) |> copy == cat(A, A, dims = 3)
 
-  info("NDArray::broadcast_axes")
+  @info("NDArray::broadcast_axes")
   @test mx.broadcast_axes(x, 1, 4) |> copy == [A; A; A; A]
-  @test mx.broadcast_axes(x, 3, 2) |> copy == cat(3, A, A)
+  @test mx.broadcast_axes(x, 3, 2) |> copy == cat(A, A, dims = 3)
 end  # function test_broadcast_axis
 
 function test_hypot()
-  info("NDArray::hypot")
+  @info("NDArray::hypot")
   A = [3 3 3]
   B = [4, 4]
   C = hypot.(A, B)
@@ -1436,7 +1445,7 @@ end  # function test_hypot
   test_slice()
   test_linear_idx()
   test_first()
-  test_endof()
+  test_lastindex()
   test_cat()
   test_plus()
   test_minus()
diff --git a/julia/test/unittest/operator.jl b/julia/test/unittest/operator.jl
index ed8312d91cc9..345dd0f88daf 100644
--- a/julia/test/unittest/operator.jl
+++ b/julia/test/unittest/operator.jl
@@ -18,14 +18,14 @@
 module TestOperator
 
 using MXNet
-using Base.Test
+using Test
 
 using ..Main: rand_dims
 
 function test_scalar_op()
   data  = mx.Variable(:data)
   shape = rand_dims()
-  info("Operator::scalar_op::dims = $shape")
+  @info "Operator::scalar_op::dims = $shape"
 
   data_jl  = 5ones(Float32, shape)
   arr_data = mx.copy(data_jl, mx.cpu())
@@ -35,7 +35,7 @@ function test_scalar_op()
   exec_test = mx.bind(test, mx.cpu(), [arr_data], args_grad=[arr_grad])
   mx.forward(exec_test)
   out = copy(exec_test.outputs[1])
-  jl_out1 = (4 - ((1+data_jl+1)*2/5) - 0.2)
+  jl_out1 = @. 4 - ((1+data_jl+1)*2/5) - 0.2
   jl_out = 2 ./ jl_out1
   @test copy(out) ≈ jl_out
 
diff --git a/julia/test/unittest/optimizer.jl b/julia/test/unittest/optimizer.jl
index b068f12fffd7..cd1e7ebb4b77 100644
--- a/julia/test/unittest/optimizer.jl
+++ b/julia/test/unittest/optimizer.jl
@@ -17,7 +17,7 @@
 
 module TestOptimizer
 
-using Base.Test
+using Test
 
 using MXNet
 using MXNet.mx.LearningRate
@@ -25,7 +25,7 @@ using MXNet.mx.Momentum
 
 
 function test_fixed_η()
-  info("Optimizer::LearningRate::Fixed")
+  @info "Optimizer::LearningRate::Fixed"
   x = LearningRate.Fixed(.42)
   @test get(x) == .42
   update!(x)
@@ -34,7 +34,7 @@ end  # function test_fixed_η
 
 
 function check_η_decay(x)
-  info("Optimizer::LearningRate::$x")
+  @info "Optimizer::LearningRate::$x"
 
   η = get(x)
   @test η == 1
@@ -55,14 +55,14 @@ test_inv_η() = LearningRate.Inv(1) |> check_η_decay
 
 
 function test_μ_null()
-  info("Optimizer::Momentum::Null")
+  @info "Optimizer::Momentum::Null"
   x = Momentum.Null()
   @test iszero(get(x))
 end
 
 
 function test_μ_fixed()
-  info("Optimizer::Momentum::Fixed")
+  @info "Optimizer::Momentum::Fixed"
   x = Momentum.Fixed(42)
   @test get(x) == 42
 end
diff --git a/julia/test/unittest/random.jl b/julia/test/unittest/random.jl
index 973a4bc32faa..013e4f609daa 100644
--- a/julia/test/unittest/random.jl
+++ b/julia/test/unittest/random.jl
@@ -17,18 +17,19 @@
 
 module TestRandom
 using MXNet
-using Base.Test
+using Test
+using Statistics
 
 function test_uniform()
   dims = (100, 100, 2)
-  info("random::uniform::dims = $dims")
+  @info "random::uniform::dims = $dims"
 
   low = -10; high = 10
   seed = 123
-  mx.srand(seed)
+  mx.seed!(seed)
   ret1 = mx.rand(dims..., low = low, high = high)
 
-  mx.srand(seed)
+  mx.seed!(seed)
   ret2 = mx.empty(dims)
   mx.rand!(ret2, low = low, high = high)
 
@@ -38,14 +39,14 @@ end
 
 function test_gaussian()
   dims = (80, 80, 4)
-  info("random::gaussian::dims = $dims")
+  @info "random::gaussian::dims = $dims"
 
   μ = 10; σ = 2
   seed = 456
-  mx.srand(seed)
+  mx.seed!(seed)
   ret1 = mx.randn(dims..., μ = μ, σ = σ)
 
-  mx.srand(seed)
+  mx.seed!(seed)
   ret2 = mx.empty(dims)
   mx.randn!(ret2, μ = μ, σ = σ)
 
diff --git a/julia/test/unittest/symbolic-node.jl b/julia/test/unittest/symbolic-node.jl
index 507af17332f8..07ef05f704db 100644
--- a/julia/test/unittest/symbolic-node.jl
+++ b/julia/test/unittest/symbolic-node.jl
@@ -18,7 +18,7 @@
 module TestSymbolicNode
 
 using MXNet
-using Base.Test
+using Test
 
 using ..Main: mlp2, mlpchain, exec
 
@@ -26,7 +26,7 @@ using ..Main: mlp2, mlpchain, exec
 # Test Implementations
 ################################################################################
 function test_basic()
-  info("SymbolicNode::basic")
+  @info("SymbolicNode::basic")
 
   model = mlp2()
   @test mx.list_arguments(model) == [:data,:fc1_weight,:fc1_bias,:fc2_weight,:fc2_bias]
@@ -35,7 +35,7 @@ function test_basic()
 end
 
 function test_chain()
-  info("SymbolicNode::chain")
+  @info("SymbolicNode::chain")
 
   model = mlpchain()
   @test mx.list_arguments(model) == [:data,:fc1_weight,:fc1_bias,:fc2_weight,:fc2_bias]
@@ -57,7 +57,7 @@ function test_chain()
 end
 
 function test_internal()
-  info("SymbolicNode::internal")
+  @info("SymbolicNode::internal")
 
   data  = mx.Variable(:data)
   oldfc = mx.FullyConnected(data, name=:fc1, num_hidden=10)
@@ -71,7 +71,7 @@ function test_internal()
 end
 
 function test_get_children()
-  info("SymbolicNode::get_children")
+  @info("SymbolicNode::get_children")
 
   let x = mx.Variable(:x), y = mx.Variable(:y)
     z = x + y
@@ -80,7 +80,7 @@ function test_get_children()
     @test mx.list_outputs(mx.get_children(z)) == [:x, :y]
   end
 
-  info("SymbolicNode::get_children::on leaf")
+  @info("SymbolicNode::get_children::on leaf")
   let x = mx.Variable(:x)
     @test mx.get_children(x) == nothing
   end
@@ -88,7 +88,7 @@ end  # test_get_children
 
 
 function test_compose()
-  info("SymbolicNode::compose")
+  @info("SymbolicNode::compose")
 
   data = mx.Variable(:data)
   net1 = mx.FullyConnected(data, name=:fc1, num_hidden=10)
@@ -104,7 +104,7 @@ function test_compose()
 end
 
 function test_infer_shape()
-  info("SymbolicNode::infer_shape::mlp2")
+  @info("SymbolicNode::infer_shape::mlp2")
 
   model = mlp2()
   data_shape = (100, 100)
@@ -118,7 +118,7 @@ function test_infer_shape()
 end
 
 function test_infer_shape_error()
-  info("SymbolicNode::infer_shape::throws")
+  @info("SymbolicNode::infer_shape::throws")
 
   model = mlp2()
   weight_shape = (100, 1)
@@ -127,7 +127,7 @@ function test_infer_shape_error()
 end
 
 function test_saveload()
-  info("SymbolicNode::saveload::mlp2")
+  @info("SymbolicNode::saveload::mlp2")
 
   model = mlp2()
   fname = tempname()
@@ -139,37 +139,37 @@ function test_saveload()
 end
 
 function test_attrs()
-  info("SymbolicNode::Attributes")
+  @info("SymbolicNode::Attributes")
 
   data = mx.Variable(:data)
 
   @test mx.get_name(data) == :data
   result = mx.get_attr(data, :test)
-  @test isnull(result)
+  @test ismissing(result)
   mx.set_attr(data, :test, "1.0")
   result = mx.get_attr(data, :test)
-  @test !isnull(result)
-  @test get(result) == "1.0"
+  @test !ismissing(result)
+  @test result == "1.0"
 
   data2 = mx.Variable(:data2, attrs = Dict(:test => "hallo!"))
-  @test get(mx.get_attr(data2, :test)) == "hallo!"
+  @test mx.get_attr(data2, :test) == "hallo!"
 
   conv = mx.Convolution(data2, kernel = (1,1), num_filter = 1)
-  @test isnull(mx.get_attr(conv, :b))
-  @test isa(mx.get_name(conv), Symbol)
+  @test ismissing(mx.get_attr(conv, :b))
+  @test mx.get_name(conv) isa Symbol
 
   @test_throws MethodError mx.Variable(:data3, attrs = Dict(:test => "1.0", :test2 => 1.0))
   @test_throws MethodError mx.Convolution(data2, kernel = (1,1), num_filter = 1, attrs = Dict(:test => "1.0", :test2 => 1.0))
 end
 
 function test_functions()
-  info("SymbolicNode::Functions")
+  @info("SymbolicNode::Functions")
   data = mx.Variable(:data)
   typeof(mx.sum(data)) == mx.SymbolicNode
 end
 
 function test_reshape()
-  info("SymbolicNode::reshape(sym, dim...)")
+  @info("SymbolicNode::reshape(sym, dim...)")
 
   A = mx.NDArray(collect(1:24))
   x = mx.Variable(:x)
@@ -181,7 +181,7 @@ function test_reshape()
   @test size(out) == (2, 3, 4)
   @test copy(out) == reshape(1:24, 2, 3, 4)
 
-  info("SymbolicNode::reshape(sym, dim)")
+  @info("SymbolicNode::reshape(sym, dim)")
 
   A = mx.NDArray(collect(1:24))
   x = mx.Variable(:x)
@@ -193,18 +193,18 @@ function test_reshape()
   @test size(out) == (2, 3, 4)
   @test copy(out) == reshape(1:24, 2, 3, 4)
 
-  info("SymbolicNode::reshape::reverse")
+  @info("SymbolicNode::reshape::reverse")
 
   A = mx.zeros(10, 5, 4)
   x = mx.Variable(:x)
-  y = mx.reshape(x, -1, 0, reverse=true)
+  y = mx.reshape(x, -1, 0, reverse = true)
   e = mx.bind(y, mx.cpu(), Dict(:x => A))
   mx.forward(e)
   out = e.outputs[1]
 
   @test size(out) == (50, 4)
 
-  info("SymbolicNode::reshape::0")
+  @info("SymbolicNode::reshape::0")
 
   A = mx.zeros(2, 3, 4)
   x = mx.Variable(:x)
@@ -215,7 +215,7 @@ function test_reshape()
 
   @test size(out) == (4, 3, 2)
 
-  info("SymbolicNode::reshape::-1")
+  @info("SymbolicNode::reshape::-1")
 
   A = mx.zeros(2, 3, 4)
   x = mx.Variable(:x)
@@ -226,7 +226,7 @@ function test_reshape()
 
   @test size(out) == (6, 1, 4)
 
-  info("SymbolicNode::reshape::-2")
+  @info("SymbolicNode::reshape::-2")
 
   A = mx.zeros(2, 3, 4, 2)
   x = mx.Variable(:x)
@@ -237,7 +237,7 @@ function test_reshape()
 
   @test size(out) == (3, 2, 4, 2)
 
-  info("SymbolicNode::reshape::-3")
+  @info("SymbolicNode::reshape::-3")
 
   A = mx.zeros(2, 3, 4, 5)
   x = mx.Variable(:x)
@@ -248,7 +248,7 @@ function test_reshape()
 
   @test size(out) == (6, 20)
 
-  info("SymbolicNode::reshape::-4")
+  @info("SymbolicNode::reshape::-4")
 
   A = mx.zeros(2, 3, 4)
   x = mx.Variable(:x)
@@ -261,12 +261,12 @@ function test_reshape()
 end
 
 function test_dot()
-  info("SymbolicNode::dot")
+  @info("SymbolicNode::dot")
   x = mx.Variable(:x)
   y = mx.Variable(:y)
   z = mx.dot(x, y)
-  z_exec = mx.bind(z, context=mx.cpu(),
-                   args=Dict(:x => mx.ones((100, 2)), :y => mx.ones((2, 200))))
+  z_exec = mx.bind(z, context = mx.cpu(),
+                   args = Dict(:x => mx.ones((100, 2)), :y => mx.ones((2, 200))))
   mx.forward(z_exec)
 
   ret = copy(z_exec.outputs[1])
@@ -275,14 +275,14 @@ function test_dot()
 end
 
 function test_print()
-  info("SymbolicNode::print")
+  @info("SymbolicNode::print")
   io = IOBuffer()
   print(io, mx.Variable(:x))
   @test !isempty(String(take!(io)))
 end
 
 function test_misc()
-  info("SymbolicNode::Miscellaneous")
+  @info("SymbolicNode::Miscellaneous")
   # Test for #189
   a = mx.Variable("a")
   b = mx.Variable("b")
@@ -290,7 +290,7 @@ function test_misc()
 end
 
 function test_add()
-  info("SymbolicNode::elementwise add")
+  @info("SymbolicNode::elementwise add")
   let x = mx.Variable(:x), A = Float32[1 2; 3 4]
     let y = exec(x .+ 42; :x => A)[]
       @test size(y) == size(A)
@@ -329,7 +329,7 @@ function test_add()
 end  # function test_add
 
 function test_minus()
-  info("SymbolicNode::elementwise minus")
+  @info("SymbolicNode::elementwise minus")
   let x = mx.Variable(:x), A = Float32[1 2; 3 4]
     let y = exec(x .- 42; :x => A)[]
       @test size(y) == size(A)
@@ -373,7 +373,7 @@ function test_minus()
 end  # function test_minus
 
 function test_mul()
-  info("SymbolicNode::elementwise mul")
+  @info("SymbolicNode::elementwise mul")
   let x = mx.Variable(:x), A = Float32[1 2; 3 4]
     let y = exec(x .* 42; :x => A)[]
       @test size(y) == size(A)
@@ -412,7 +412,7 @@ function test_mul()
 end  # function test_mul
 
 function test_div()
-  info("SymbolicNode::elementwise div")
+  @info("SymbolicNode::elementwise div")
   let x = mx.Variable(:x), A = Float32[1 2; 3 4]
     let y = exec(x ./ 42; :x => A)[]
       @test size(y) == size(A)
@@ -451,16 +451,16 @@ function test_div()
 end  # function test_div
 
 function test_power()
-  info("SymbolicNode::elementwise power")
+  @info("SymbolicNode::elementwise power")
   let x = mx.Variable(:x), A = Float32[1 2; 3 4]
-    let y = exec(x.^42; :x => A)[]
+    let y = exec(x .^ 42; :x => A)[]
       @test size(y) == size(A)
-      @test copy(y) ≈ A.^42
+      @test copy(y) ≈ A .^ 42
     end
 
-    let y = exec(42.^x; :x => A)[]
+    let y = exec(42 .^ x; :x => A)[]
       @test size(y) == size(A)
-      @test copy(y) ≈ 42.^A
+      @test copy(y) ≈ 42 .^ A
     end
   end
 
@@ -468,61 +468,61 @@ function test_power()
     x = mx.Variable(:x)
     y = mx.Variable(:y)
 
-    let z = x.^y
+    let z = x .^ y
       z = exec(z; :x => A, :y => B)[]
 
       @test size(z) == size(A)
-      @test copy(z) ≈ A.^B
+      @test copy(z) ≈ A .^ B
     end
 
-    let z = y.^x
+    let z = y .^ x
       z = exec(z; :x => A, :y => B)[]
 
       @test size(z) == size(A)
-      @test copy(z) ≈ B.^A
+      @test copy(z) ≈ B .^ A
     end
   end
 
-  info("SymbolicNode::power::e.^x::x.^e")
+  @info("SymbolicNode::power::e .^ x::x .^ e")
   let x = mx.Variable(:x), A = [0 0 0; 0 0 0]
-    y = exec(e.^x; :x => A)[]
-    @test copy(y) ≈ ones(A)
+    y = exec(ℯ .^ x; :x => A)[]
+    @test copy(y) ≈ fill(1, size(A))
   end
 
   let x = mx.Variable(:x), A = Float32[1 2; 3 4]
-    let y = e.^x
+    let y = ℯ .^ x
       z = exec(y; :x => A)[]
-      @test copy(z) ≈ e.^A
+      @test copy(z) ≈ ℯ .^ A
     end
 
-    let y = x.^e
+    let y = x .^ ℯ
       z = exec(y; :x => A)[]
-      @test copy(z) ≈ A.^e
+      @test copy(z) ≈ A .^ ℯ
     end
   end
 
-  info("SymbolicNode::power::π.^x::x.^π")
+  @info("SymbolicNode::power::π .^ x::x .^ π")
   let x = mx.Variable(:x), A = Float32[1 2; 3 4]
-    let y = π.^x
+    let y = π .^ x
       z = exec(y; :x => A)[]
-      @test copy(z) ≈ π.^A
+      @test copy(z) ≈ π .^ A
     end
 
-    let y = x.^π
+    let y = x .^ π
       z = exec(y; :x => A)[]
-      @test copy(z) ≈ A.^π
+      @test copy(z) ≈ A .^ π
     end
   end
 end  # function test_power
 
 function test_get_name()
-  info("SymbolicNode::get_name::with get_internals")
+  @info("SymbolicNode::get_name::with get_internals")
   name = mx.get_name(mx.get_internals(mlp2()))  # no error
-  @test contains(name, "Ptr")
+  @test occursin("Ptr", name)
 end  # function test_get_name
 
 function test_var()
-  info("SymbolicNode::var")
+  @info("SymbolicNode::var")
   x = @mx.var x
   @test x isa mx.SymbolicNode
 
diff --git a/julia/test/unittest/util.jl b/julia/test/unittest/util.jl
index ddd613ca48ea..d7f65a3e8012 100644
--- a/julia/test/unittest/util.jl
+++ b/julia/test/unittest/util.jl
@@ -17,19 +17,18 @@
 
 module TestUtil
 
-using Base.Test
-
 using MXNet
+using Test
 
 
 function test_getdocdefine()
-  info("Util::_getdocdefine")
-  @test contains(mx._getdocdefine("sgd_update"), "Defined in")
+  @info("Util::_getdocdefine")
+  @test occursin("Defined in", mx._getdocdefine("sgd_update"))
 end  # function test_getdocdefine
 
 
 function test_firstarg()
-  info("Util::_firstarg")
+  @info("Util::_firstarg")
   @test mx._firstarg(:(f(x, y))) == :x
   @test mx._firstarg(:(f(x::mx.NDArray, y))) == :x
   @test mx._firstarg(:(f(x::mx.NDArray, y::mx.NDArray))) == :x
diff --git a/julia/test/unittest/visualize.jl b/julia/test/unittest/visualize.jl
index 58d111b0fe14..a5a4f722e6e2 100644
--- a/julia/test/unittest/visualize.jl
+++ b/julia/test/unittest/visualize.jl
@@ -17,7 +17,7 @@
 
 module TestVisualize
 using MXNet
-using Base.Test
+using Test
 
 using ..Main: mlp2
 
@@ -26,7 +26,7 @@ using ..Main: mlp2
 ################################################################################
 
 function test_basic()
-  info("Visualize::basic")
+  @info("Visualize::basic")
 
   mlp = mlp2()