From 3a026e7d02c0917b8c5a48c38ae86c77238b3cc9 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Tue, 10 Sep 2019 17:56:59 -0700 Subject: [PATCH 01/10] seq last fix --- src/operator/mxnet_op.h | 2 +- src/operator/sequence_last-inl.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h index ae98952bb7bf..d59144cd7368 100644 --- a/src/operator/mxnet_op.h +++ b/src/operator/mxnet_op.h @@ -499,7 +499,7 @@ MSHADOW_XINLINE Shape unravel(const index_t idx, const Shape& shape) Shape ret; #pragma unroll for (index_t i = ndim-1, j = idx; i >=0; --i) { - auto tmp = j / shape[i]; + index_t tmp = j / shape[i]; ret[i] = j - tmp*shape[i]; j = tmp; } diff --git a/src/operator/sequence_last-inl.h b/src/operator/sequence_last-inl.h index 3c3c8b0cd49e..b77782e711a6 100644 --- a/src/operator/sequence_last-inl.h +++ b/src/operator/sequence_last-inl.h @@ -101,8 +101,8 @@ class SequenceLastOp : public Operator { using namespace mshadow::expr; int axis = param_.axis; - int out_size = out.size(0) * out.size(1); - int max_seq_len = data.size(axis); + index_t out_size = out.size(0) * out.size(1); + index_t max_seq_len = data.size(axis); index_t offset1 = axis ? out.size(1) : out_size; index_t offset2 = axis ? (max_seq_len * out.size(1)) : out.size(1); From 0bdf05169d685b8d751f8517c93566ef6745cb63 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Wed, 11 Sep 2019 22:55:05 +0000 Subject: [PATCH 02/10] index tensor to have int64 --- src/operator/sequence_last.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/operator/sequence_last.cc b/src/operator/sequence_last.cc index 44869c518504..b53c9a934fd6 100644 --- a/src/operator/sequence_last.cc +++ b/src/operator/sequence_last.cc @@ -31,9 +31,9 @@ template <> Operator *CreateOp(SequenceLastParam param, int dtype, int itype) { Operator *op = nullptr; MSHADOW_TYPE_SWITCH(dtype, DType, { - MSHADOW_TYPE_SWITCH(itype, IType, { - op = new SequenceLastOp(param); - }); +// MSHADOW_TYPE_SWITCH(itype, IType, { + op = new SequenceLastOp(param); + // }); }); return op; } From 1411a42b14d5a84bc3b73820a7c9496c85699ee5 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Thu, 12 Sep 2019 10:36:45 -0700 Subject: [PATCH 03/10] fix dtypes --- src/operator/sequence_last-inl.h | 8 ++++---- src/operator/sequence_last.cc | 6 +++--- tests/nightly/test_large_vector.py | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/operator/sequence_last-inl.h b/src/operator/sequence_last-inl.h index b77782e711a6..78ade5e9de06 100644 --- a/src/operator/sequence_last-inl.h +++ b/src/operator/sequence_last-inl.h @@ -121,11 +121,11 @@ class SequenceLastOp : public Operator { using namespace mshadow::expr; auto axis = param_.axis; - int batch = out_grad.size(0); - int rest = out_grad.size(1); - int out_size = batch * rest; + index_t batch = out_grad.size(0); + index_t rest = out_grad.size(1); + index_t out_size = batch * rest; - int max_seq_len = in_grad.size(axis); + index_t max_seq_len = in_grad.size(axis); index_t offset1 = axis ? rest : out_size; index_t offset2 = axis ? (max_seq_len * rest) : rest; diff --git a/src/operator/sequence_last.cc b/src/operator/sequence_last.cc index b53c9a934fd6..69886d5b891f 100644 --- a/src/operator/sequence_last.cc +++ b/src/operator/sequence_last.cc @@ -31,9 +31,9 @@ template <> Operator *CreateOp(SequenceLastParam param, int dtype, int itype) { Operator *op = nullptr; MSHADOW_TYPE_SWITCH(dtype, DType, { -// MSHADOW_TYPE_SWITCH(itype, IType, { - op = new SequenceLastOp(param); - // }); + MSHADOW_TYPE_SWITCH(itype, IType, { + op = new SequenceLastOp(param); + }); }); return op; } diff --git a/tests/nightly/test_large_vector.py b/tests/nightly/test_large_vector.py index b59a4462d922..65dd71153c5a 100644 --- a/tests/nightly/test_large_vector.py +++ b/tests/nightly/test_large_vector.py @@ -346,7 +346,7 @@ def test_sequence_reverse(): def test_sequence_last(): - a = nd.arange(0, LARGE_X * 2).reshape(LARGE_X, 2) + a = nd.arange(0, LARGE_X * 2, dtype="int64").reshape(LARGE_X, 2) # test if returns last sequence b = nd.SequenceLast(a) @@ -356,7 +356,7 @@ def test_sequence_last(): # test with sequence length # parameter sequence_length - NDArray with shape (batch_size) # (2,3) indicates 2nd sequence from batch 1 and 3rd sequence from batch 2 - b = nd.SequenceLast(a, sequence_length=mx.nd.array([2, 3]), + b = nd.SequenceLast(a, sequence_length=mx.nd.array([2, 3], dtype="int64"), use_sequence_length=True) # check if it takes 2nd sequence from the first batch assert b[0] == a[1][0] From dfcd59930836b9837af591cdaa31b3124b894698 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Thu, 12 Sep 2019 10:52:36 -0700 Subject: [PATCH 04/10] revert unnecessary changes --- src/operator/mxnet_op.h | 2 +- src/operator/sequence_last.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h index d59144cd7368..ae98952bb7bf 100644 --- a/src/operator/mxnet_op.h +++ b/src/operator/mxnet_op.h @@ -499,7 +499,7 @@ MSHADOW_XINLINE Shape unravel(const index_t idx, const Shape& shape) Shape ret; #pragma unroll for (index_t i = ndim-1, j = idx; i >=0; --i) { - index_t tmp = j / shape[i]; + auto tmp = j / shape[i]; ret[i] = j - tmp*shape[i]; j = tmp; } diff --git a/src/operator/sequence_last.cc b/src/operator/sequence_last.cc index 69886d5b891f..44869c518504 100644 --- a/src/operator/sequence_last.cc +++ b/src/operator/sequence_last.cc @@ -31,9 +31,9 @@ template <> Operator *CreateOp(SequenceLastParam param, int dtype, int itype) { Operator *op = nullptr; MSHADOW_TYPE_SWITCH(dtype, DType, { - MSHADOW_TYPE_SWITCH(itype, IType, { + MSHADOW_TYPE_SWITCH(itype, IType, { op = new SequenceLastOp(param); - }); + }); }); return op; } From 9123c956338aa09d0c8b2bd764775b02a909d096 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Thu, 12 Sep 2019 11:35:28 -0700 Subject: [PATCH 05/10] if seq len not passed, pass int64 dtype --- src/operator/sequence_last.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/operator/sequence_last.cc b/src/operator/sequence_last.cc index 44869c518504..5206a792e11c 100644 --- a/src/operator/sequence_last.cc +++ b/src/operator/sequence_last.cc @@ -46,8 +46,9 @@ Operator *SequenceLastProp::CreateOperatorEx(Context ctx, DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], (*in_type)[1]); } - // sequence_length not passed in, so fall back to using input array dtype for second argument - DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], (*in_type)[0]); + // sequence_length not passed in, so fall back to using int64 dtype for second argument + // second argument is the dtype of the sequence_length NDArray + DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], 6); } DMLC_REGISTER_PARAMETER(SequenceLastParam); From e639d4307f3bf4e535382924c55dc04d67f9f474 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Thu, 12 Sep 2019 11:36:58 -0700 Subject: [PATCH 06/10] dtype comment --- tests/nightly/test_large_vector.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/nightly/test_large_vector.py b/tests/nightly/test_large_vector.py index 65dd71153c5a..903b3a5cc005 100644 --- a/tests/nightly/test_large_vector.py +++ b/tests/nightly/test_large_vector.py @@ -346,7 +346,7 @@ def test_sequence_reverse(): def test_sequence_last(): - a = nd.arange(0, LARGE_X * 2, dtype="int64").reshape(LARGE_X, 2) + a = nd.arange(0, LARGE_X * 2).reshape(LARGE_X, 2) # test if returns last sequence b = nd.SequenceLast(a) @@ -356,6 +356,8 @@ def test_sequence_last(): # test with sequence length # parameter sequence_length - NDArray with shape (batch_size) # (2,3) indicates 2nd sequence from batch 1 and 3rd sequence from batch 2 + # need to mention dtype = int64 for sequence_length ndarray to support large indices + # else it defaults to float32 and errors b = nd.SequenceLast(a, sequence_length=mx.nd.array([2, 3], dtype="int64"), use_sequence_length=True) # check if it takes 2nd sequence from the first batch From f53d4dcd47dec0507867b5c675ef3a79e7d49bf0 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Thu, 12 Sep 2019 12:05:39 -0700 Subject: [PATCH 07/10] use int32 or int64 as index dtype based on build flag --- src/operator/sequence_last.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/operator/sequence_last.cc b/src/operator/sequence_last.cc index 5206a792e11c..6c6154adf94f 100644 --- a/src/operator/sequence_last.cc +++ b/src/operator/sequence_last.cc @@ -46,9 +46,15 @@ Operator *SequenceLastProp::CreateOperatorEx(Context ctx, DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], (*in_type)[1]); } - // sequence_length not passed in, so fall back to using int64 dtype for second argument + // sequence_length not passed in, so fall back to using int32/int64 dtype for second argument // second argument is the dtype of the sequence_length NDArray - DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], 6); + // use int32 or int64 as index dtype based on build flag + #if MXNET_USE_INT64_TENSOR_SIZE == 1 + DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], mshadow::kInt64); + #else + DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], mshadow::kInt32); + #endif + } DMLC_REGISTER_PARAMETER(SequenceLastParam); From 0d921f772091aa52f3ce185083c9c2d7b2e590ff Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Fri, 13 Sep 2019 10:02:19 -0700 Subject: [PATCH 08/10] Trigger notification From 54061f2081a6ea8f4b801d3971bd8ad599735d59 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Sat, 14 Sep 2019 19:01:44 -0700 Subject: [PATCH 09/10] Trigger notification From d180560101640b42b2c2a4fb81be7463859c2d05 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Mon, 16 Sep 2019 09:55:31 -0700 Subject: [PATCH 10/10] lint fix --- src/operator/sequence_last.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/operator/sequence_last.cc b/src/operator/sequence_last.cc index 6c6154adf94f..3a6cdbad6149 100644 --- a/src/operator/sequence_last.cc +++ b/src/operator/sequence_last.cc @@ -54,7 +54,6 @@ Operator *SequenceLastProp::CreateOperatorEx(Context ctx, #else DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], mshadow::kInt32); #endif - } DMLC_REGISTER_PARAMETER(SequenceLastParam);