diff --git a/3rdparty/mshadow/mshadow/dot_engine-inl.h b/3rdparty/mshadow/mshadow/dot_engine-inl.h index 225821e13f5a..93273154b429 100644 --- a/3rdparty/mshadow/mshadow/dot_engine-inl.h +++ b/3rdparty/mshadow/mshadow/dot_engine-inl.h @@ -299,17 +299,17 @@ struct BLASEngine { } inline static void gemm(Stream *stream, bool transa, bool transb, - int m, int n, int k, float alpha, - const float *A, int lda, const float *B, int ldb, - float beta, float *C, int ldc) { + index_t m, index_t n, index_t k, float alpha, + const float *A, index_t lda, const float *B, index_t ldb, + float beta, float *C, index_t ldc) { cblas_sgemm(CblasColMajor, GetT(transa), GetT(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } inline static void batched_gemm(Stream *stream, bool transa, bool transb, - int m, int n, int k, float alpha, - const float *A, int lda, const float *B, int ldb, - float beta, float *C, int ldc, int batch_count, + index_t m, index_t n, index_t k, float alpha, + const float *A, index_t lda, const float *B, index_t ldb, + float beta, float *C, index_t ldc, index_t batch_count, float **workspace) { #if (MSHADOW_USE_MKL && INTEL_MKL_VERSION >= 20160000) // since same m/n/k is used for all single gemms, so we put all gemms into one group @@ -408,17 +408,17 @@ struct BLASEngine { } inline static void gemm(Stream *stream, bool transa, bool transb, - int m, int n, int k, double alpha, - const double *A, int lda, const double *B, int ldb, - double beta, double *C, int ldc) { + index_t m, index_t n, index_t k, double alpha, + const double *A, index_t lda, const double *B, index_t ldb, + double beta, double *C, index_t ldc) { cblas_dgemm(CblasColMajor, GetT(transa), GetT(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } inline static void batched_gemm(Stream *stream, bool transa, bool transb, - int m, int n, int k, double alpha, - const double *A, int lda, const double *B, int ldb, - double beta, double *C, int ldc, int batch_count, + index_t m, index_t n, index_t k, double alpha, + const double *A, index_t lda, const double *B, index_t ldb, + double beta, double *C, index_t ldc, index_t batch_count, double **workspace) { #if (MSHADOW_USE_MKL && INTEL_MKL_VERSION >= 20160000) // since same m/n/k is used for all single gemms, so we put all gemms into one group diff --git a/src/operator/numpy/np_tensordot_op-inl.h b/src/operator/numpy/np_tensordot_op-inl.h index d025f1558535..1e5ba7b45b2b 100644 --- a/src/operator/numpy/np_tensordot_op-inl.h +++ b/src/operator/numpy/np_tensordot_op-inl.h @@ -60,10 +60,10 @@ inline void ShiftAxes(Tuple* axes_summed, const int ndim) { /** * Gets matrix dimensions of a and b after transpose and reshape. */ -inline void GetMatrixDimensions(int* ad1, - int* ad2, - int* bd1, - int* bd2, +inline void GetMatrixDimensions(index_t* ad1, + index_t* ad2, + index_t* bd1, + index_t* bd2, const mxnet::Tuple& a_axes_remained, const mxnet::Tuple& a_axes_summed, const mxnet::Tuple& b_axes_remained, @@ -157,10 +157,10 @@ void MatrixDot(const OpContext& ctx, const TBlob& b, const TBlob& out, const OpReqType req, - const int ad1, - const int ad2, - const int bd1, - const int bd2, + const index_t ad1, + const index_t ad2, + const index_t bd1, + const index_t bd2, const bool aT = false, const bool bT = false) { using namespace mshadow; @@ -266,7 +266,7 @@ void TensordotImpl(const Tuple& a_axes_summed, GetReorderedAxes(a_axes_summed, &a_axes_remained, &a_axes, b_axes_summed, &b_axes_remained, &b_axes, a_shape, b_shape); - int ad1 = 1, ad2 = 1, bd1 = 1, bd2 = 1; + index_t ad1 = 1, ad2 = 1, bd1 = 1, bd2 = 1; GetMatrixDimensions(&ad1, &ad2, &bd1, &bd2, a_axes_remained, a_axes_summed, b_axes_remained, b_axes_summed, a_shape, b_shape); @@ -435,7 +435,7 @@ void TensordotBackwardImpl(const Tuple& a_axes_summed, GetReorderedAxes(a_axes_summed, &a_axes_remained, &a_axes, b_axes_summed, &b_axes_remained, &b_axes, a_shape, b_shape); - int ad1 = 1, ad2 = 1, bd1 = 1, bd2 = 1; + index_t ad1 = 1, ad2 = 1, bd1 = 1, bd2 = 1; GetMatrixDimensions(&ad1, &ad2, &bd1, &bd2, a_axes_remained, a_axes_summed, b_axes_remained, b_axes_summed, a_shape, b_shape); @@ -653,7 +653,7 @@ void TensordotIntAxesImpl(const int axes, GetReorderedAxes(a_axes_summed, &a_axes_remained, &a_axes, b_axes_summed, &b_axes_remained, &b_axes, a_shape, b_shape); - int ad1 = 1, ad2 = 1, bd1 = 1, bd2 = 1; + index_t ad1 = 1, ad2 = 1, bd1 = 1, bd2 = 1; GetMatrixDimensions(&ad1, &ad2, &bd1, &bd2, a_axes_remained, a_axes_summed, b_axes_remained, b_axes_summed, a_shape, b_shape); MatrixDot(ctx, a, b, out, req, ad1, ad2, bd1, bd2); @@ -746,7 +746,7 @@ void TensordotIntAxesBackwardImpl(const int axes, GetReorderedAxes(a_axes_summed, &a_axes_remained, &a_axes, b_axes_summed, &b_axes_remained, &b_axes, a_shape, b_shape); - int ad1 = 1, ad2 = 1, bd1 = 1, bd2 = 1; + index_t ad1 = 1, ad2 = 1, bd1 = 1, bd2 = 1; GetMatrixDimensions(&ad1, &ad2, &bd1, &bd2, a_axes_remained, a_axes_summed, b_axes_remained, b_axes_summed, a_shape, b_shape); diff --git a/tests/nightly/test_np_large_array.py b/tests/nightly/test_np_large_array.py index 072e80b3a34e..7f131354959a 100644 --- a/tests/nightly/test_np_large_array.py +++ b/tests/nightly/test_np_large_array.py @@ -36,6 +36,7 @@ LARGE_X = 100000000 SMALL_X = 100 SMALL_Y = 50 +INT_OVERFLOW = 2**31 @use_np @@ -76,3 +77,15 @@ def test_softmax(): true_output = np.full((SMALL_Y, LARGE_X), (1 / input_data.shape[axis])) output = npx.softmax(input_data, axis=axis) assert_almost_equal(output.asnumpy(), true_output, rtol=1e-5, atol=1e-5) + +#@pytest.mark.skip(reason="CI hasn't switch to ILP64 OpenBLAS yet") +@use_np +def test_dot(): + A = np.ones((1, INT_OVERFLOW), dtype='float32') + B = np.ones((INT_OVERFLOW, 1), dtype='float32') + A.attach_grad() + with mx.autograd.record(): + C = np.dot(A, B) + assert_almost_equal(C.asnumpy(), [INT_OVERFLOW], rtol=1e-5, atol=1e-5) + C.backward() + assert A.grad.shape == (1, INT_OVERFLOW)