Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Upgrade archive utility and add back FC improvement #15171

Merged
merged 10 commits into from
Jun 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -368,10 +368,32 @@ endif

# Guard against displaying nvcc info messages to users not using CUDA.
ifeq ($(USE_CUDA), 1)
# Get AR version, compare with expected ar version and find bigger and smaller version of the two
AR_VERSION := $(shell ar --version | egrep -o "([0-9]{1,}\.)+[0-9]{1,}")
EXPECTED_AR_VERSION := $(shell echo "2.27")
LARGE_VERSION := $(shell printf '%s\n' "$(AR_VERSION)" "$(EXPECTED_AR_VERSION)" | sort -V | tail -n 1)
SMALL_VERSION := $(shell printf '%s\n' "$(AR_VERSION)" "$(EXPECTED_AR_VERSION)" | sort -V | head -n 1)

# If NVCC is not at the location specified, use CUDA_PATH instead.
ifeq ("$(wildcard $(NVCC))","")
ifneq ($(USE_CUDA_PATH), NONE)
NVCC=$(USE_CUDA_PATH)/bin/nvcc

# if larger version is the expected one and larger != smaller
# this means ar version is less than expected version and user needs to be warned
ifeq ($(LARGE_VERSION), $(EXPECTED_AR_VERSION))
ifneq ($(LARGE_VERSION), $(SMALL_VERSION))
define n


endef

$(warning WARNING: Archive utility: ar version being used is less than 2.27.0. $n \
Note that with USE_CUDA=1 flag and USE_CUDNN=1 this is known to cause problems. $n \
For more info see: https://github.com/apache/incubator-mxnet/issues/15084)
$(shell sleep 5)
endif
endif
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about the corresponding modification to CMake?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we dont yet need it for cmake. we have encountered this issue only with make.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's weird. Thanks for your answer. I would expect to have similar amount of object code with both builds.

$(info INFO: nvcc was not found on your path)
$(info INFO: Using $(NVCC) as nvcc path)
else
Expand Down
2 changes: 2 additions & 0 deletions ci/docker/Dockerfile.build.ubuntu_build_cuda
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ COPY install/ubuntu_clang.sh /work/
RUN /work/ubuntu_clang.sh
COPY install/ubuntu_mklml.sh /work/
RUN /work/ubuntu_mklml.sh
COPY install/ubuntu_ar.sh /work/
RUN /work/ubuntu_ar.sh

ENV CUDNN_VERSION=7.5.1.10
COPY install/ubuntu_cudnn.sh /work/
Expand Down
38 changes: 38 additions & 0 deletions ci/docker/install/ubuntu_ar.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# build and install are separated so changes to build don't invalidate
# the whole docker cache for the image

marcoabreu marked this conversation as resolved.
Show resolved Hide resolved
set -ex

wget https://mirror.clarkson.edu/gnu/binutils/binutils-2.27.tar.gz

export DEBIAN_FRONTEND=noninteractive
apt-get update || true
apt-get install -y \
wget

mkdir /opt/binutils_install && mkdir /opt/binutils_install && mkdir /opt/binutils && cd /opt/binutils
wget -nv https://mirror.clarkson.edu/gnu/binutils/binutils-2.27.tar.gz
tar -xvf binutils-2.27.tar.gz && cd binutils-2.27
./configure --prefix=/opt/binutils_other --exec-prefix=/opt/binutils_install
make -j$(nproc)
make install
ln -s /opt/binutils_install/bin/ar /usr/local/bin/ar
2 changes: 2 additions & 0 deletions docs/install/build_from_source.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ More information on turning these features on or off are found in the following
There is a configuration file for make,
[`make/config.mk`](https://github.com/apache/incubator-mxnet/blob/master/make/config.mk), that contains all the compilation options. You can edit it and then run `make` or `cmake`. `cmake` is recommended for building MXNet (and is required to build with MKLDNN), however you may use `make` instead. For building with Java/Scala/Clojure, only `make` is supported.

**NOTE:** When certain set of build flags are set, MXNet archive increases to more than 4 GB. Since MXNet uses archive internally archive runs into a bug ("File Truncated": [bugreport](https://sourceware.org/bugzilla/show_bug.cgi?id=14625)) for archives greater than 4 GB. Please use ar version 2.27 or greater to overcome this bug. Please see https://github.com/apache/incubator-mxnet/issues/15084 for more details.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we explicitly give the flags that causes such bloat, so that, not all users building from source need to worry new archive utility?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the most significant part comes from the GPU code, and it's already an explicit switch

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes most significant part comes from GPU code. If I provide set of flags that are failing today, no guarantee that tomorrow it would not bloat and not fail for a different set of flags (which includes USE_CUDA=1 and USE_CUDNN=1) and then these instructions may quickly become outdated.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to detect this case so users are not faced with a strange error message but specifically prompted to upgrade ar? I'm afraid that users might not see this specific part of our documentation and just get frustrated. If our build pipeline would automatically notify them (ideally ahead of time or) after the error occured, that would be beneficial to the user experience.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marcoabreu I have added a warning message for users if the ar is smaller than 2.27


<hr>

## Build MXNet
Expand Down
14 changes: 13 additions & 1 deletion src/operator/nn/fully_connected-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "../elemwise_op_common.h"
#include "../linalg.h"
#include "../../common/utils.h"
#include "../tensor/broadcast_reduce_op.h"

namespace mxnet {
namespace op {
Expand Down Expand Up @@ -169,7 +170,18 @@ void FCBackward(const OpContext &ctx, const FullyConnectedParam &param,
// gradient of bias
if (!param.no_bias) {
Tensor<xpu, 1, DType> gbias = in_grad[fullc::kBias].get<xpu, 1, DType>(s);
Assign(gbias, req[fullc::kBias], sum_rows(grad));
TBlob grad_blob = TBlob(grad);
TBlob gbias_blob = TBlob(gbias);
mxnet::TShape x(1, 0);
mxnet::TShape small;
if (shape_assign(&gbias_blob.shape_, Shape2(param.num_hidden, 1))) {
small = gbias_blob.shape_;
} else {
small = ReduceAxesShapeImpl(grad_blob.shape_, dmlc::optional<mxnet::TShape>(x), true, false);
}
ReduceAxesComputeImpl<xpu, mshadow::red::sum, false, false,
mshadow_op::identity>(ctx, {grad_blob}, {req[fullc::kBias]},
{in_grad[fullc::kBias]}, small);
}
// gradient of data
// Legacy approach shown here for comparison:
Expand Down
2 changes: 0 additions & 2 deletions src/operator/nn/fully_connected.cc
Original file line number Diff line number Diff line change
Expand Up @@ -316,11 +316,9 @@ NNVM_REGISTER_OP(_backward_FullyConnected)
const FullyConnectedParam& params = nnvm::get<FullyConnectedParam>(attrs.parsed);
return params.no_bias ? 2 : 3;
})
#if MXNET_USE_MKLDNN == 1
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
#endif
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
.set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs){
return std::vector<std::pair<int, int> >{{1, 0}};
Expand Down
21 changes: 21 additions & 0 deletions tests/python/unittest/test_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,27 @@ def test_symbol_pow():
check_symbolic_backward(test, [data_tmp, exp_tmp], [np.ones(shape)], [data_dir, exp_dir])


@with_seed()
def test_fully_connected():
data = mx.sym.var("data")
fc_weight = mx.sym.var("weight")
fc_bias = mx.sym.var("bias")
fc = mx.sym.FullyConnected(data=data, weight=fc_weight, bias=fc_bias, num_hidden=10, no_bias=False, name='fc')
data = mx.nd.random.uniform(shape=(5, 5, 5, 13), dtype=np.float32)
fc_weight = mx.nd.random.uniform(shape=(10, 325), dtype=np.float32)
fc_bias = mx.nd.random.uniform(shape=(10), dtype=np.float32)
fc_bias2 = mx.nd.random.uniform(shape=(10, 1), dtype=np.float32)
data_np = data.asnumpy().reshape(5, 325)
fc_weight_np = np.transpose(fc_weight.asnumpy())
fc_bias_np = fc_bias.asnumpy()
res = np.dot(data_np, fc_weight_np) + fc_bias.asnumpy()
check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}, {'fc_output': res})
check_numeric_gradient(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np},
numeric_eps=1e-2, rtol=1e-4, atol=1e-2)
# TODO: Fix Bug #15032 when bias has ndim > 1
#check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias2.asnumpy()}, {'fc_output': res})


@with_seed()
def test_pow_fn():
shape = (3, 4)
Expand Down