Skip to content

Commit

Permalink
Merge pull request apache#68 from tqchen/master
Browse files Browse the repository at this point in the history
Change dot to device API
  • Loading branch information
tqchen committed Oct 27, 2015
2 parents 3b2a872 + 47f0149 commit f2d0e25
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 31 deletions.
3 changes: 2 additions & 1 deletion guide/basic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ int main(void) {
lhs = 1.0;
rhs = 1.0;
ret = implicit_dot(lhs, rhs.T());
printf("vdot=%f\n", VectorDot(lhs[0], rhs[0]));
VectorDot(ret[0].Slice(0, 1), lhs[0], rhs[0]);
printf("vdot=%f\n", ret[0][0]);
int cnt = 0;
for (index_t i = 0; i < ret.size(0); ++i) {
for (index_t j = 0; j < ret.size(1); ++j) {
Expand Down
58 changes: 35 additions & 23 deletions mshadow/dot_engine-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,19 @@ struct BLASEngine<cpu> {
const double *Y, int incY, double *A, int lda) {
cblas_dger(CblasColMajor, m, n, alpha, X, incX, Y, incY, A, lda);
}
inline static float dot(Stream<cpu> *stream,
int n,
const float* X, int incX,
const float* Y, int incY) {
return cblas_sdot(n, X, incX, Y, incY);
}
inline static double dot(Stream<cpu> *stream,
int n,
const double* X, int incX,
const double* Y, int incY) {
return cblas_ddot(n, X, incX, Y, incY);
inline static void dot(Stream<cpu> *stream,
int n,
const float* X, int incX,
const float* Y, int incY,
float* ret) {
*ret = cblas_sdot(n, X, incX, Y, incY);
}
inline static void dot(Stream<cpu> *stream,
int n,
const double* X, int incX,
const double* Y, int incY,
double* ret) {
*ret = cblas_ddot(n, X, incX, Y, incY);
}
};
#elif MSHADOW_STAND_ALONE == 1
Expand Down Expand Up @@ -138,13 +140,15 @@ struct BLASEngine<cpu> {
inline static void dot(Stream<cpu> *stream,
int n,
const float* X, int incX,
const float* Y, int incY) {
const float* Y, int incY,
float* ret) {
LOG(FATAL) << "Not implmented!";
}
inline static void dot(Stream<cpu> *stream,
int n,
const double* X, int incX,
const double* Y, int incY) {
const double* Y, int incY,
double* ret) {
LOG(FATAL) << "Not implmented!";
}
};
Expand Down Expand Up @@ -218,24 +222,32 @@ struct BLASEngine<gpu> {
m, n, &alpha, X, incX, Y, incY, A, lda);
CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Cublas: Dger fail";
}
inline static float dot(Stream<gpu> *stream,
inline static void dot(Stream<gpu> *stream,
int n,
const float* X, int incX,
const float* Y, int incY) {
float ret;
const float* Y, int incY,
float *ret) {
cublasSetPointerMode(Stream<gpu>::GetBlasHandle(stream),
CUBLAS_POINTER_MODE_DEVICE);
cublasStatus_t err = cublasSdot(Stream<gpu>::GetBlasHandle(stream),
n, X, incX, Y, incY, &ret);
n, X, incX, Y, incY, ret);
CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Cublas: Dot fail";
cublasSetPointerMode(Stream<gpu>::GetBlasHandle(stream),
CUBLAS_POINTER_MODE_HOST);
return ret;
}
inline static double dot(Stream<gpu> *stream,
int n,
const double* X, int incX,
const double* Y, int incY) {
double ret;
inline static void dot(Stream<gpu> *stream,
int n,
const double* X, int incX,
const double* Y, int incY,
double *ret) {
cublasSetPointerMode(Stream<gpu>::GetBlasHandle(stream),
CUBLAS_POINTER_MODE_DEVICE);
cublasStatus_t err = cublasDdot(Stream<gpu>::GetBlasHandle(stream),
n, X, incX, Y, incY, &ret);
n, X, incX, Y, incY, ret);
CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Cublas: Dot fail";
cublasSetPointerMode(Stream<gpu>::GetBlasHandle(stream),
CUBLAS_POINTER_MODE_HOST);
return ret;
}
};
Expand Down
7 changes: 4 additions & 3 deletions mshadow/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -764,13 +764,14 @@ inline void MapReduceKeepHighDim(TRValue<R, gpu, 1, DType> *dst,

/*!
* \brief CPU/GPU: 1 dimension vector dot
* \param dst Length 1 vector, used to hold the result.
* \param lhs Left operand vector
* \param rhs right operand vector
* \return dot(lhs, rhs)
*/
template<typename Device, typename DType>
inline DType VectorDot(const Tensor<Device, 1, DType> &lhs,
const Tensor<Device, 1, DType> &rhs);
inline void VectorDot(Tensor<Device, 1, DType> dst,
const Tensor<Device, 1, DType> &lhs,
const Tensor<Device, 1, DType> &rhs);
} // namespace mshadow
// include headers
#include "./stream_gpu-inl.h"
Expand Down
13 changes: 9 additions & 4 deletions mshadow/tensor_cpu-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,11 +334,16 @@ inline void Softmax(Tensor<cpu, 3, DType> dst,

// blas related
template<typename Device, typename DType>
inline DType VectorDot(const Tensor<Device, 1, DType> &lhs,
const Tensor<Device, 1, DType> &rhs) {
inline void VectorDot(Tensor<Device, 1, DType> dst,
const Tensor<Device, 1, DType> &lhs,
const Tensor<Device, 1, DType> &rhs) {
CHECK_EQ(lhs.size(0), rhs.size(0))
<< "VectorDot: Shape mismatch";
CHECK_EQ(dst.size(0), 1)
<< "VectorDot: expect dst to be scalar";
expr::BLASEngine<Device>::SetStream(lhs.stream_);
return mshadow::expr::BLASEngine<Device>::dot(
lhs.stream_, lhs.size(0), lhs.dptr_, 1, rhs.dptr_, 1);
mshadow::expr::BLASEngine<Device>::dot(
lhs.stream_, lhs.size(0), lhs.dptr_, 1, rhs.dptr_, 1, dst.dptr_);
}
} // namespace mshadow
#endif // MSHADOW_TENSOR_CPU_INL_H_

0 comments on commit f2d0e25

Please sign in to comment.