diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake index 93cbda213158..76984367dc0a 100644 --- a/cmake/Cuda.cmake +++ b/cmake/Cuda.cmake @@ -189,7 +189,7 @@ function(detect_cuDNN) DOC "Path to cuDNN include directory." ) get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH) - find_library(CUDNN_LIBRARY NAMES libcudnn.so # libcudnn_static.a + find_library(CUDNN_LIBRARY NAMES libcudnn.so cudnn.lib # libcudnn_static.a PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} ${__libpath_hist} DOC "Path to cuDNN library.") diff --git a/guide/basic.cpp b/guide/basic.cpp index 3ea3c776e9c8..6ad74ec3ef06 100644 --- a/guide/basic.cpp +++ b/guide/basic.cpp @@ -12,7 +12,7 @@ int main(void) { float data[20]; // create a 2 x 5 x 2 tensor, from existing space Tensor ts(data, Shape3(2,5,2)); - // take first subscript of the tensor + // take first subscript of the tensor Tensor mat = ts[0]; // Tensor object is only a handle, assignment means they have same data content // we can specify content type of a Tensor, if not specified, it is float bydefault @@ -69,6 +69,16 @@ int main(void) { } printf("\n"); + TensorContainer recover_lhs(Shape2(2, 3)), small_mat(Shape2(2, 3)); + small_mat = -100.0f; + recover_lhs = mat_fill_row_element(small_mat, choosed, index); + for (index_t i = 0; i < recover_lhs.size(0); ++i) { + for (index_t j = 0; j < recover_lhs.size(1); ++j) { + printf("%.2f ", recover_lhs[i][j] - lhs[i][j]); + } + } + printf("\n"); + rhs = one_hot_encode(index, 3); for (index_t i = 0; i < lhs.size(0); ++i) { diff --git a/mshadow/random.h b/mshadow/random.h index 2b28305017c4..f4ab2384cde1 100644 --- a/mshadow/random.h +++ b/mshadow/random.h @@ -317,22 +317,30 @@ class Random { inline void GenGaussian(float *dptr, size_t size, float mu, float sigma) { curandStatus_t status; status = curandGenerateNormal(gen_, dptr, size, mu, sigma); - CHECK_EQ(status, CURAND_STATUS_SUCCESS) << "CURAND Gen Uniform failed"; + CHECK_EQ(status, CURAND_STATUS_SUCCESS) << "CURAND Gen Normal float failed." + << " size = " << size + << ",mu = " << mu + << ",sigma = " << sigma; } inline void GenGaussian(double *dptr, size_t size, double mu, double sigma) { curandStatus_t status; status = curandGenerateNormalDouble(gen_, dptr, size, mu, sigma); - CHECK_EQ(status, CURAND_STATUS_SUCCESS) << "CURAND Gen Uniform failed"; + CHECK_EQ(status, CURAND_STATUS_SUCCESS) << "CURAND Gen Normal double failed." + << " size = " << size + << ",mu = " << mu + << ",sigma = " << sigma; } inline void GenUniform(float *dptr, size_t size) { curandStatus_t status; status = curandGenerateUniform(gen_, dptr, size); - CHECK_EQ(status, CURAND_STATUS_SUCCESS) << "CURAND Gen Uniform failed"; + CHECK_EQ(status, CURAND_STATUS_SUCCESS) << "CURAND Gen Uniform float failed." + << " size = " << size; } inline void GenUniform(double *dptr, size_t size) { curandStatus_t status; status = curandGenerateUniformDouble(gen_, dptr, size); - CHECK_EQ(status, CURAND_STATUS_SUCCESS) << "CURAND Gen Uniform failed"; + CHECK_EQ(status, CURAND_STATUS_SUCCESS) << "CURAND Gen Uniform double failed." + << " size = " << size; } /*! \brief random numbeer generator */ curandGenerator_t gen_; @@ -361,7 +369,9 @@ template template inline void Random::SampleGaussian( Tensor *dst, DType mu, DType sigma) { - if (dst->CheckContiguous()) { + // We need to check whether the shape size is even since CuRand supports only normal distribution + // generation of even number of elements. + if (dst->CheckContiguous() && (dst->shape_.Size() % 2 == 0)) { this->GenGaussian(dst->dptr_, dst->shape_.Size(), mu, sigma); } else { *dst = this->gaussian(dst->shape_, mu, sigma); diff --git a/mshadow/tensor_blob.h b/mshadow/tensor_blob.h index d7fd8798d690..98c83f81f27c 100644 --- a/mshadow/tensor_blob.h +++ b/mshadow/tensor_blob.h @@ -317,7 +317,7 @@ struct TShape { inline std::ostream &operator<<(std::ostream &os, const TShape &shape) { os << '('; for (index_t i = 0; i < shape.ndim(); ++i) { - if (i != 0) os << ", "; + if (i != 0) os << ','; os << shape[i]; } // python style tuple diff --git a/mshadow/tensor_cpu-inl.h b/mshadow/tensor_cpu-inl.h index 4f8b472a9f57..cd6d3ba3ac01 100644 --- a/mshadow/tensor_cpu-inl.h +++ b/mshadow/tensor_cpu-inl.h @@ -35,12 +35,14 @@ inline void DeleteStream(Stream *stream) { template inline std::ostream &operator<<(std::ostream &os, const Shape &shape) { // NOLINT(*) - os << "("; + os << '('; for (int i = 0; i < ndim; ++i) { - if (i != 0) os << ","; + if (i != 0) os << ','; os << shape[i]; } - os << ")"; + // python style tuple + if (ndim == 1) os << ','; + os << ')'; return os; }