Skip to content

Commit

Permalink
Merge pull request #436 from lamblin/help_debug
Browse files Browse the repository at this point in the history
Use "right" ld* with 1D matrices
  • Loading branch information
nouiz authored May 17, 2017
2 parents 65ed476 + f82d5be commit 5db51f9
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ before_install:
- export PREFIX=$HOME/.local
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update && brew install doxygen; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export PYTHONUSERBASE=$PREFIX; fi
- pip install --user breathe sphinx sphinx_rtd_theme cython numpy 'mako>=0.7' six
- pip install --user breathe sphinx==1.5.1 sphinx_rtd_theme cython numpy 'mako>=0.7' six
- export PATH=$PATH:$PREFIX/bin
- export CPATH=$CPATH:$PREFIX/include
- export LIBRARY_PATH=$LIBRARY_PATH:$PREFIX/lib
Expand Down
36 changes: 24 additions & 12 deletions src/gpuarray_array_blas.c
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ int GpuArray_rgemm(cb_transpose transA, cb_transpose transB, double alpha,
else {
err = GpuArray_copy(&copyA, A, GA_F_ORDER);
if (err != GA_NO_ERROR)
goto cleanup;
goto cleanup;
Ap = &copyA;
}
}
Expand All @@ -264,7 +264,7 @@ int GpuArray_rgemm(cb_transpose transA, cb_transpose transB, double alpha,
else {
err = GpuArray_copy(&copyB, B, GA_F_ORDER);
if (err != GA_NO_ERROR)
goto cleanup;
goto cleanup;
Bp = &copyB;
}
}
Expand Down Expand Up @@ -388,7 +388,7 @@ int GpuArray_rger(double alpha, GpuArray *X, GpuArray *Y, GpuArray *A,
else {
err = GpuArray_copy(&copyX, X, GA_ANY_ORDER);
if (err != GA_NO_ERROR)
goto cleanup;
goto cleanup;
Xp = &copyX;
}
}
Expand All @@ -398,7 +398,7 @@ int GpuArray_rger(double alpha, GpuArray *X, GpuArray *Y, GpuArray *A,
else {
err = GpuArray_copy(&copyY, Y, GA_ANY_ORDER);
if (err != GA_NO_ERROR)
goto cleanup;
goto cleanup;
Yp = &copyY;
}
}
Expand Down Expand Up @@ -526,7 +526,7 @@ int GpuArray_rgemmBatch_3d(cb_transpose transA, cb_transpose transB, double alph
err = GpuArray_copy(&copyA, A, GA_C_ORDER);
cA = 1;
if (err != GA_NO_ERROR)
goto cleanup;
goto cleanup;
Ap = &copyA;
}
}
Expand All @@ -538,7 +538,7 @@ int GpuArray_rgemmBatch_3d(cb_transpose transA, cb_transpose transB, double alph
err = GpuArray_copy(&copyB, B, GA_C_ORDER);
cB = 1;
if (err != GA_NO_ERROR)
goto cleanup;
goto cleanup;
Bp = &copyB;
}
}
Expand All @@ -550,24 +550,32 @@ int GpuArray_rgemmBatch_3d(cb_transpose transA, cb_transpose transB, double alph

if (cC == 2) {
o = cb_fortran;
ldc = Cp->strides[2] / elsize;
ldc = Cp->dimensions[2] > 1
? Cp->strides[2] / elsize
: Cp->dimensions[1];
} else if (cC == 1) {
o = cb_c;
ldc = Cp->strides[1] / elsize;
ldc = Cp->dimensions[1] > 1
? Cp->strides[1] / elsize
: Cp->dimensions[2];
} else {
err = GA_VALUE_ERROR;
goto cleanup;
}
if (cA == 2) {
lda = Ap->strides[2] / elsize;
lda = Ap->dimensions[2] > 1
? Ap->strides[2] / elsize
: Ap->dimensions[1];
if (o == cb_c) {
if (transA == cb_no_trans)
transA = cb_trans;
else
transA = cb_no_trans;
}
} else if (cA == 1) {
lda = Ap->strides[1] / elsize;
lda = Ap->dimensions[1] > 1
? Ap->strides[1] / elsize
: Ap->dimensions[2];
if (o == cb_fortran) {
if (transA == cb_no_trans)
transA = cb_trans;
Expand All @@ -579,15 +587,19 @@ int GpuArray_rgemmBatch_3d(cb_transpose transA, cb_transpose transB, double alph
goto cleanup;
}
if (cB == 2) {
ldb = Bp->strides[2] / elsize;
ldb = Bp->dimensions[2] > 1
? Bp->strides[2] / elsize
: Bp->dimensions[1];
if (o == cb_c) {
if (transB == cb_no_trans)
transB = cb_trans;
else
transB = cb_no_trans;
}
} else if (cB == 1) {
ldb = Bp->strides[1] / elsize;
ldb = Bp->dimensions[1] > 1
? Bp->strides[1] / elsize
: Bp->dimensions[2];
if (o == cb_fortran) {
if (transB == cb_no_trans)
transB = cb_trans;
Expand Down

0 comments on commit 5db51f9

Please sign in to comment.