Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

add a compiler flag to use int64 as tensor size #14570

Merged
merged 37 commits into from
Apr 23, 2019
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
41351f3
use a compile flag to use int64 tensor size
apeforest Mar 29, 2019
e9bd3cc
use personal mshadow repo
apeforest Mar 29, 2019
d8d21ed
Merge remote-tracking branch 'upstream/master' into perf/large-tensor
apeforest Apr 2, 2019
caf8e7f
update data type
apeforest Apr 2, 2019
0ea2cbc
update make config
apeforest Apr 2, 2019
3a3c02f
change size_t to index_t and add documentation
apeforest Apr 9, 2019
b1ca6dd
update mshadow submodule to master
apeforest Apr 15, 2019
5443fd5
fix compilation warning
apeforest Apr 15, 2019
872255f
fix compiler warning
apeforest Apr 15, 2019
4bd1805
fix compiler warning
apeforest Apr 15, 2019
08e9b10
fix compiler warning
apeforest Apr 15, 2019
3a4661a
Merge remote-tracking branch 'upstream/master' into perf/large-tensor
apeforest Apr 15, 2019
d3d6cc6
fix compiler warning
apeforest Apr 15, 2019
7e3ed63
fix compiler error
apeforest Apr 15, 2019
54735db
change nnvm::Tuple to mxnet::Tuple
apeforest Apr 16, 2019
5fd9ad1
Merge remote-tracking branch 'upstream/master' into perf/large-tensor
apeforest Apr 16, 2019
0758d0c
fix compiler warning
apeforest Apr 16, 2019
a503ec5
fix compiler warning
apeforest Apr 16, 2019
cd9aa53
fix compiler warning
apeforest Apr 16, 2019
12559b1
fix compiler warning
apeforest Apr 16, 2019
a4e4a0c
fix compiler warning
apeforest Apr 16, 2019
2399864
fix lint
apeforest Apr 17, 2019
334d775
update CI runtime_functons
apeforest Apr 17, 2019
826613a
update runtime function
apeforest Apr 17, 2019
4412b90
correct runtime_functions
apeforest Apr 17, 2019
1047eb5
udpate runtime functions
apeforest Apr 17, 2019
97a1c08
add nightly test for large tensor
apeforest Apr 17, 2019
861b95e
update Jenkins files to test new compiler flag
apeforest Apr 17, 2019
5054f8d
Merge remote-tracking branch 'upstream/master' into perf/large-tensor
apeforest Apr 17, 2019
935389d
Merge remote-tracking branch 'upstream/master' into perf/large-tensor
apeforest Apr 18, 2019
b86e630
fix CI
apeforest Apr 18, 2019
f7540d1
Merge remote-tracking branch 'upstream/master' into perf/large-tensor
apeforest Apr 18, 2019
d8b04b3
add runtime feature detect for the compiler flag
apeforest Apr 19, 2019
20221d6
change build from make to cmake
apeforest Apr 19, 2019
bc95113
fix CI
apeforest Apr 19, 2019
9c672b7
move tests to nightly
apeforest Apr 20, 2019
27584ea
Merge remote-tracking branch 'upstream/master' into perf/large-tensor
apeforest Apr 20, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[submodule "3rdparty/mshadow"]
path = 3rdparty/mshadow
url = https://github.com/dmlc/mshadow.git
url = https://github.com/apeforest/mshadow.git
apeforest marked this conversation as resolved.
Show resolved Hide resolved
[submodule "3rdparty/dmlc-core"]
path = 3rdparty/dmlc-core
url = https://github.com/dmlc/dmlc-core.git
Expand Down
2 changes: 1 addition & 1 deletion 3rdparty/mshadow
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ mxnet_option(USE_SIGNAL_HANDLER "Print stack traces on segfaults." OFF)
mxnet_option(USE_TENSORRT "Enable infeference optimization with TensorRT." OFF)
mxnet_option(USE_ASAN "Enable Clang/GCC ASAN sanitizers." OFF)
mxnet_option(ENABLE_TESTCOVERAGE "Enable compilation with test coverage metric output" OFF)
mxnet_option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number of elements in a tensor" OFF)

message(STATUS "CMAKE_CROSSCOMPILING ${CMAKE_CROSSCOMPILING}")
message(STATUS "CMAKE_HOST_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR}")
Expand Down Expand Up @@ -295,6 +296,13 @@ else()
add_definitions(-DMXNET_USE_NCCL=0)
endif()

if (USE_INT64_TENSOR_SIZE)
apeforest marked this conversation as resolved.
Show resolved Hide resolved
message(STATUS "Using 64-bit integer for tensor size")
add_definitions(-DMSHADOW_INT64_TENSOR_SIZE=1)
else()
add_definitions(-DMSHADOW_INT64_TENSOR_SIZE=0)
endif()

include(cmake/ChooseBlas.cmake)
if(USE_CUDA AND FIRST_CUDA)
include(3rdparty/mshadow/cmake/Utils.cmake)
Expand Down
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,11 @@ ifeq ($(USE_OPERATOR_TUNING), 1)
CFLAGS += -DMXNET_USE_OPERATOR_TUNING=1
endif

ifeq ($(USE_INT64_TENSOR_SIZE), 1)
CFLAGS += -DMSHADOW_INT64_TENSOR_SIZE=1
apeforest marked this conversation as resolved.
Show resolved Hide resolved
else
CFLAGS += -DMSHADOW_INT64_TENSOR_SIZE=0
endif
# verify existence of separate lapack library when using blas/openblas/atlas
# switch off lapack support in case it can't be found
# issue covered with this
Expand Down
13 changes: 7 additions & 6 deletions include/mxnet/tensor_blob.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,15 +219,16 @@ class TBlob {
return shape_.ndim();
}
/*!
* \brief return size of i-th dimension, start counting from highest dimension
* \brief return size of i-th dimension, start counting from highest dimension.
* return type needs to be a signed integer.
* \param idx the dimension count from the highest dimensin
* \return the size
* \return the size. -1 means unknown size to support zero-size tensor.
*/
inline index_t size(index_t idx) const {
return shape_[idx];
}
/*! \brief total number of elements in the tensor */
inline index_t Size(void) const {
inline size_t Size(void) const {
return shape_.Size();
}
/*! \brief get pointer in dtype */
Expand Down Expand Up @@ -442,7 +443,7 @@ class FieldEntry<mxnet::TShape>
throw dmlc::ParamError(os.str());
}
if (enforce_nonzero_) {
for (mxnet::index_t i = 0; i < v.ndim(); ++i) {
for (int i = 0; i < v.ndim(); ++i) {
apeforest marked this conversation as resolved.
Show resolved Hide resolved
if (v[i] == 0U) {
std::ostringstream os;
os << "value " << v << "for Parameter " << this->key_
Expand All @@ -456,7 +457,7 @@ class FieldEntry<mxnet::TShape>
this->enforce_nonzero_ = true;
return this->self();
}
inline FieldEntry<mxnet::TShape> &set_expect_ndim(mxnet::index_t ndim) {
inline FieldEntry<mxnet::TShape> &set_expect_ndim(int ndim) {
apeforest marked this conversation as resolved.
Show resolved Hide resolved
expect_ndim_ = ndim;
return this->self();
}
Expand All @@ -465,7 +466,7 @@ class FieldEntry<mxnet::TShape>
// whether all the entries need to be nonzero
bool enforce_nonzero_;
// expected number of dimension, default = 0 means no restriction.
mxnet::index_t expect_ndim_;
int expect_ndim_;
};

} // namespace parameter
Expand Down
38 changes: 19 additions & 19 deletions include/mxnet/tuple.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ class Tuple {
return ndim_ <= kStackCache ? (data_stack_ + ndim_): (data_heap_ + ndim_);
}
/*! \return number of dimension of the tuple */
inline uint32_t ndim() const {
inline int ndim() const {
return ndim_;
}
/*!
Expand Down Expand Up @@ -316,17 +316,17 @@ class Tuple {

protected:
// stack cache size
static const uint32_t kStackCache = 4;
static const int kStackCache = 4;
/*! \brief number of dimension of the tuple */
uint32_t ndim_{0};
int ndim_{0};
/*! \brief number of cells allocated in data_heap_ */
uint32_t num_heap_allocated_{0};
int num_heap_allocated_{0};
/*! \brief in stack space used to store shape when it is small */
ValueType data_stack_[kStackCache];
/*! \brief space to store shape when dimension is big*/
ValueType* data_heap_{nullptr};
// internal function to change the dimension
inline void SetDim(uint32_t ndim) {
inline void SetDim(int ndim) {
if (ndim > kStackCache &&
ndim > num_heap_allocated_) {
delete [] data_heap_;
Expand All @@ -348,7 +348,7 @@ class TShape : public Tuple<dim_t> {
* constructor to construct a shape with all 1.
* \param ndim the number of dimension
*/
inline TShape(uint32_t ndim) { // NOLINT(*)
inline TShape(int ndim) { // NOLINT(*)
this->SetDim(ndim);
std::fill_n(begin(), ndim, 1);
}
Expand Down Expand Up @@ -460,7 +460,7 @@ class TShape : public Tuple<dim_t> {
*/
template<int dim>
inline mshadow::Shape<dim> get() const {
CHECK_EQ(dim, static_cast<int>(ndim()))
CHECK_EQ(dim, ndim())
<< "dimension do not match target dimension " << dim << " vs " << ndim();
const dim_t *d = this->data();
mshadow::Shape<dim> s;
Expand All @@ -479,7 +479,7 @@ class TShape : public Tuple<dim_t> {
const dim_t *d = this->data();
s.shape_[1] = d[ndim() - 1];
dim_t ymax = 1;
for (size_t i = 1; i < ndim(); ++i) {
for (int i = 1; i < ndim(); ++i) {
ymax *= d[i - 1];
}
s.shape_[0] = ymax;
Expand All @@ -491,7 +491,7 @@ class TShape : public Tuple<dim_t> {
* \param axis_end The ending axis specified.
* \return the flat 3d shape
*/
inline mshadow::Shape<3> FlatTo3D(size_t axis_begin, size_t axis_end) const {
inline mshadow::Shape<3> FlatTo3D(int axis_begin, int axis_end) const {
CHECK(axis_end >= axis_begin);
mshadow::Shape<3> s;
if (ndim() == 0) return mshadow::Shape3(0, 0, 0);
Expand All @@ -500,13 +500,13 @@ class TShape : public Tuple<dim_t> {
s.shape_[1] = 1;
s.shape_[2] = 1;

for (size_t i = 0; i < axis_begin; ++i) {
for (int i = 0; i < axis_begin; ++i) {
s.shape_[0] *= d[i];
}
for (size_t i = axis_begin; i <= axis_end; ++i) {
for (int i = axis_begin; i <= axis_end; ++i) {
s.shape_[1] *= d[i];
}
for (size_t i = axis_end + 1; i < ndim(); ++i) {
for (int i = axis_end + 1; i < ndim(); ++i) {
s.shape_[2] *= d[i];
}
return s;
Expand All @@ -516,7 +516,7 @@ class TShape : public Tuple<dim_t> {
* \param axis The axis specified.
* \return the flat 3d shape
*/
inline mshadow::Shape<3> FlatTo3D(size_t axis) const {
inline mshadow::Shape<3> FlatTo3D(int axis) const {
return FlatTo3D(axis, axis);
}
inline bool operator==(const TShape &s) const {
Expand Down Expand Up @@ -611,9 +611,9 @@ template<typename T>
struct hash<mxnet::Tuple<T> > {
/*! \brief hash a Tuple into unsigned int */
size_t operator()(const mxnet::Tuple<T>& val) const {
std::hash<uint32_t> hash_uint;
size_t res = hash_uint(val.ndim());
for (uint32_t i = 0; i < val.ndim(); ++i) {
std::hash<int> hash_int;
size_t res = hash_int(val.ndim());
for (int i = 0; i < val.ndim(); ++i) {
res = dmlc::HashCombine(res, val[i]);
}
return res;
Expand All @@ -625,9 +625,9 @@ template<>
struct hash<mxnet::TShape> {
/*! \brief hash a TShape into unsigned int */
size_t operator()(const mxnet::TShape& val) const {
std::hash<uint32_t> hash_uint;
size_t res = hash_uint(val.ndim());
for (uint32_t i = 0; i < val.ndim(); ++i) {
std::hash<int> hash_int;
size_t res = hash_int(val.ndim());
for (int i = 0; i < val.ndim(); ++i) {
res = dmlc::HashCombine(res, val[i]);
}
return res;
Expand Down
6 changes: 6 additions & 0 deletions make/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@ EXTRA_OPERATORS =
# Create C++ interface package
USE_CPP_PACKAGE = 0

# Use int64_t type to represent the total number of elements in a tensor
# This will cause performance degradation reported in issue #14496
eric-haibin-lin marked this conversation as resolved.
Show resolved Hide resolved
# Set to 1 for large tensor with tensor size greater than INT32_MAX i.e. 2147483647
# Note: the size of each dimension is still bounded by INT32_MAX
USE_INT64_TENSOR_SIZE = 0

#----------------------------
# plugins
#----------------------------
Expand Down
6 changes: 6 additions & 0 deletions make/crosscompile.jetson.mk
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,12 @@ EXTRA_OPERATORS =
# Create C++ interface package
USE_CPP_PACKAGE = 0

# Use int64_t type to represent the total number of elements in the tensor
# This will cause performance degradation reported in issue #14496
# Set to 1 for large tensor with tensor size greater than INT32_MAX i.e. 2147483647
# Note: the size of each dimension is still bounded by INT32_MAX
USE_INT64_TENSOR_SIZE = 0

#----------------------------
# plugins
#----------------------------
Expand Down
6 changes: 6 additions & 0 deletions make/osx.mk
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ EXTRA_OPERATORS =
# Create C++ interface package
USE_CPP_PACKAGE = 0

# Use int64_t type to represent the total number of elements in a tensor
# This will cause performance degradation reported in issue #14496
# Set to 1 for large tensor with tensor size greater than INT32_MAX i.e. 2147483647
# Note: the size of each dimension is still bounded by INT32_MAX
USE_INT64_TENSOR_SIZE = 0

#----------------------------
# plugins
#----------------------------
Expand Down
2 changes: 1 addition & 1 deletion src/operator/convolution_v1-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ class ConvolutionV1Op : public Operator {
// param_.workspace is in elements of sizeof(DType)
// if param_.workspace is set to zero the nstep_ equals ishape[0] (batch)
nstep_ = std::max<index_t>(
std::min(static_cast<index_t>(param_.workspace) /
std::min<index_t>(param_.workspace /
(shape_colunit_.Size() + shape_dstunit_.Size()), ishape[0]),
1);

Expand Down
2 changes: 1 addition & 1 deletion src/operator/nn/deconvolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ class DeconvolutionOp {
oshape[2] * oshape[3]);
// See convolution for workspace calculations. nstep_ will be the effective batch size
nstep_ = std::max<index_t>(
std::min(static_cast<index_t>(param_.workspace) /
std::min<index_t>(param_.workspace /
(shape_colunit_.Size() + shape_dstunit_.Size()), ishape[0]),
1);

Expand Down
4 changes: 2 additions & 2 deletions src/operator/operator_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ inline bool shape_assign(mxnet::TShape *y, const mxnet::TShape& x) {
} else if (y->ndim() != x.ndim()) {
return x.ndim() == 0;
} else {
for (size_t i = 0; i < y->ndim(); ++i) {
for (int i = 0; i < y->ndim(); ++i) {
if ((*y)[i] == 0) {
(*y)[i] = x[i];
} else if ((*y)[i] != x[i] && x[i] != 0) {
Expand Down Expand Up @@ -563,7 +563,7 @@ class OpSignature {
}

void AddSign(const mxnet::TShape &shape) {
for (size_t i = 0; i < shape.ndim(); i++) {
for (int i = 0; i < shape.ndim(); i++) {
hash = hash * 2 + shape[i];
eles.push_back(shape[i]);
}
Expand Down