Skip to content

Commit

Permalink
Merge pull request apache#67 from tqchen/master
Browse files Browse the repository at this point in the history
Add vector dot
  • Loading branch information
tqchen committed Oct 26, 2015
2 parents 28ffc0a + 7b4d869 commit 3b2a872
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 12 deletions.
1 change: 1 addition & 0 deletions guide/basic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ int main(void) {
lhs = 1.0;
rhs = 1.0;
ret = implicit_dot(lhs, rhs.T());
printf("vdot=%f\n", VectorDot(lhs[0], rhs[0]));
int cnt = 0;
for (index_t i = 0; i < ret.size(0); ++i) {
for (index_t j = 0; j < ret.size(1); ++j) {
Expand Down
1 change: 1 addition & 0 deletions mshadow/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
#ifndef MSHADOW_USE_MKL
#define MSHADOW_USE_MKL 1
#endif

/*!
* \brief use CUDA support, must ensure that the cuda include path is correct,
* or directly compile using nvcc
Expand Down
50 changes: 47 additions & 3 deletions mshadow/dot_engine-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ struct DotEngine {
// handles the dot
template<typename Device>
struct BLASEngine;
#if (MSHADOW_USE_CBLAS || MSHADOW_USE_MKL)
#if (MSHADOW_USE_MKL || MSHADOW_USE_CBLAS)
template<>
struct BLASEngine<cpu> {
inline static CBLAS_TRANSPOSE GetT(bool t) {
Expand Down Expand Up @@ -74,6 +74,18 @@ struct BLASEngine<cpu> {
const double *Y, int incY, double *A, int lda) {
cblas_dger(CblasColMajor, m, n, alpha, X, incX, Y, incY, A, lda);
}
inline static float dot(Stream<cpu> *stream,
int n,
const float* X, int incX,
const float* Y, int incY) {
return cblas_sdot(n, X, incX, Y, incY);
}
inline static double dot(Stream<cpu> *stream,
int n,
const double* X, int incX,
const double* Y, int incY) {
return cblas_ddot(n, X, incX, Y, incY);
}
};
#elif MSHADOW_STAND_ALONE == 1
template<>
Expand Down Expand Up @@ -123,6 +135,18 @@ struct BLASEngine<cpu> {
const double *Y, int incY, double *A, int lda) {
LOG(FATAL) << "Not implmented!";
}
inline static void dot(Stream<cpu> *stream,
int n,
const float* X, int incX,
const float* Y, int incY) {
LOG(FATAL) << "Not implmented!";
}
inline static void dot(Stream<cpu> *stream,
int n,
const double* X, int incX,
const double* Y, int incY) {
LOG(FATAL) << "Not implmented!";
}
};
#endif // MSHADOW_USE_CBLAS || MSHADOW_USE_MKL || MSHADOW_STAND_ALONE
// CuBLAS redirect code
Expand Down Expand Up @@ -183,17 +207,37 @@ struct BLASEngine<gpu> {
const float *X, int incX,
const float *Y, int incY, float *A, int lda) {
cublasStatus_t err = cublasSger(Stream<gpu>::GetBlasHandle(stream),
m, n, &alpha, X, incX, Y, incY, A, lda);
m, n, &alpha, X, incX, Y, incY, A, lda);
CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Cublas: Sger fail";
}
inline static void ger(Stream<gpu> *stream,
int m, int n, double alpha,
const double *X, int incX,
const double *Y, int incY, double *A, int lda) {
cublasStatus_t err = cublasDger(Stream<gpu>::GetBlasHandle(stream),
m, n, &alpha, X, incX, Y, incY, A, lda);
m, n, &alpha, X, incX, Y, incY, A, lda);
CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Cublas: Dger fail";
}
inline static float dot(Stream<gpu> *stream,
int n,
const float* X, int incX,
const float* Y, int incY) {
float ret;
cublasStatus_t err = cublasSdot(Stream<gpu>::GetBlasHandle(stream),
n, X, incX, Y, incY, &ret);
CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Cublas: Dot fail";
return ret;
}
inline static double dot(Stream<gpu> *stream,
int n,
const double* X, int incX,
const double* Y, int incY) {
double ret;
cublasStatus_t err = cublasDdot(Stream<gpu>::GetBlasHandle(stream),
n, X, incX, Y, incY, &ret);
CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Cublas: Dot fail";
return ret;
}
};
#endif // MSHADOW_USE_CUDA
// helper function to decide which shape we are in
Expand Down
10 changes: 10 additions & 0 deletions mshadow/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,16 @@ template<typename Saver, typename Reducer, int dimkeep,
inline void MapReduceKeepHighDim(TRValue<R, gpu, 1, DType> *dst,
const expr::Exp<E, DType, etype> &exp,
DType scale = 1);

/*!
* \brief CPU/GPU: 1 dimension vector dot
* \param lhs Left operand vector
* \param rhs right operand vector
* \return dot(lhs, rhs)
*/
template<typename Device, typename DType>
inline DType VectorDot(const Tensor<Device, 1, DType> &lhs,
const Tensor<Device, 1, DType> &rhs);
} // namespace mshadow
// include headers
#include "./stream_gpu-inl.h"
Expand Down
17 changes: 8 additions & 9 deletions mshadow/tensor_cpu-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "./base.h"
#include "./tensor.h"
#include "./packet-inl.h"
#include "./dot_engine-inl.h"

namespace mshadow {
template<>
Expand Down Expand Up @@ -331,15 +332,13 @@ inline void Softmax(Tensor<cpu, 3, DType> dst,
}
}

template<typename DType>
inline DType VDot(const Tensor<cpu, 1, DType> &lhs,
const Tensor<cpu, 1, DType> &rhs) {
CHECK_EQ(lhs.shape_, rhs.shape_) << "VDot: shape mismatch";
DType sum = static_cast<DType>(0);
for (index_t x = 0; x < lhs.size(0); ++x) {
sum += lhs[x] * rhs[x];
}
return sum;
// blas related
template<typename Device, typename DType>
inline DType VectorDot(const Tensor<Device, 1, DType> &lhs,
const Tensor<Device, 1, DType> &rhs) {
expr::BLASEngine<Device>::SetStream(lhs.stream_);
return mshadow::expr::BLASEngine<Device>::dot(
lhs.stream_, lhs.size(0), lhs.dptr_, 1, rhs.dptr_, 1);
}
} // namespace mshadow
#endif // MSHADOW_TENSOR_CPU_INL_H_

0 comments on commit 3b2a872

Please sign in to comment.