Skip to content

Commit f6a457d

Browse files
luisacicolinilnghrdntcrAlbertoParravicini
authored
Grcuda 85 support cusparse (#25)
* initial commit, folder creation for cusparse * added function to CUSPARSERegistry, missing: nfi functions' input settings * nfi signatures completed, missing: Desc types mgmt * final commit from my machine :( * enum addition and casting * completed enum revision * added first test cusparseCoo * added SpMV test, not working * Removed useless function instantiations in CUSPARSERegistry. For the moment being we are exposing to the user the functions to create various matrix descripts as well as creating/destroying the handle. This needs to be changed * Almost functioning version of testSpMV. The error that i'm getting now is related to the enums, but at least no more polyglot exceptions * initial support for cusparse * cleaned code, more tests added * minor additions to tests * minor fixes to context and options for enabling cusparse * formatting and cleaning complete * minor fixes to context options in cusparse tests * changelog updated * Removed unused imports * Modified libcusparse.so.11 -> libcusparse.so * updated copyright for new files * added suport for async functions, implementation of non-exposed functions * removed useless initialization * added sparseSgemvi * begun implementation of proxiesn [commit della svolta] * added basic functions to proxyspmv, to be testes * added proxy for Sgemvi * Completed functions for SpMV and Sgemvi, context creation missing * tests ready for proxies, not working (context issues) * working on contexts * context creation fails * minor additions to sparse proxy * createCoo now works * proxies all right, non valid handle in buffersize function * IT WORKS * initial steps for tests implementation for Sgemvi and SpMV with CSR format * working tests for coo and csr with spmv, sgemvi does not work (does not update the vector passed as input) * minor fixes to tests * finished testing sgemvi and spmv * finished testing sgemvi and spmv * added test for libraries integration * minor fixes, all good, streams' functioning for libraries interoperability checked with profiler * changelog updated * tests * Added breaks to switch statement * partially working tests for TGemvi and SpMV * Fixed spmv Tests for coo and csr, gemvi still needs to be fixed * TGemvi now works with data types C and S * added streams syncing to tests * added syncing, sometimes (after mx clean) crs/coo do not work, tgemvi does not work with double types, despite syncing * Removed double and double complex from tests * GrCUDAOptions updated for cuSPARSE * fixed context * removed ternary expressions * small cleanups; fixed tracking of array dependencies not working in cusparse * updated changelog Co-authored-by: Francesco Sgherzi <[email protected]> Co-authored-by: Francesco Sgherzi <[email protected]> Co-authored-by: Alberto Parravicini <[email protected]>
1 parent fd93a0c commit f6a457d

19 files changed

+1621
-29
lines changed

CHANGELOG.md

+11
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
# 2021-11-21
2+
3+
* Enabled support for cuSPARSE
4+
* Operations with sparse matrices from cuSPARSE library are now supported
5+
* **Known limitation:** Not all data types are supported: in particular Tgemvi does not support double data types
6+
(both complex and not)
7+
* Concurrent operations on parallel streams were analyzed using Nvidia Profiler
8+
9+
=======
10+
111
# 2021-11-17
212

313
* Added the support of precise timing of kernels, for debugging and complex scheduling policies
@@ -22,6 +32,7 @@
2232
* Changed all the print in the source code in log events, with different logging levels
2333
* Added documentation about logging in docs
2434

35+
2536
# 2021-10-13
2637

2738
* Enabled support for cuBLAS and cuML in the async scheduler

grcuda-data

projects/com.nvidia.grcuda.test/src/com/nvidia/grcuda/test/cudalibraries/CUBLASTest.java

+7-6
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,14 @@
4040
import java.util.Collection;
4141
import java.util.function.Function;
4242

43-
import com.nvidia.grcuda.runtime.executioncontext.ExecutionPolicyEnum;
44-
import com.nvidia.grcuda.test.util.GrCUDATestUtil;
43+
import org.graalvm.polyglot.Context;
44+
import org.graalvm.polyglot.Value;
4545
import org.junit.Test;
4646
import org.junit.runner.RunWith;
4747
import org.junit.runners.Parameterized;
48-
import org.graalvm.polyglot.Context;
49-
import org.graalvm.polyglot.Value;
48+
49+
import com.nvidia.grcuda.runtime.executioncontext.ExecutionPolicyEnum;
50+
import com.nvidia.grcuda.test.util.GrCUDATestUtil;
5051

5152
@RunWith(Parameterized.class)
5253
public class CUBLASTest {
@@ -243,8 +244,8 @@ public static void assertOutputVectorIsCorrect(int len, Value deviceArray,
243244
}
244245
}
245246

246-
private void assertOutputVectorIsCorrect(int len, Value deviceArray,
247-
Function<Integer, Integer> outFunc) {
247+
void assertOutputVectorIsCorrect(int len, Value deviceArray,
248+
Function<Integer, Integer> outFunc) {
248249
CUBLASTest.assertOutputVectorIsCorrect(len, deviceArray, outFunc, this.typeChar);
249250
}
250251

projects/com.nvidia.grcuda.test/src/com/nvidia/grcuda/test/cudalibraries/CUSPARSETest.java

+507
Large diffs are not rendered by default.

projects/com.nvidia.grcuda.test/src/com/nvidia/grcuda/test/util/GrCUDATestUtil.java

-2
Original file line numberDiff line numberDiff line change
@@ -127,5 +127,3 @@ private static boolean isOptionRedundantForSync(GrCUDATestOptionsStruct options)
127127
return false;
128128
}
129129
}
130-
131-

projects/com.nvidia.grcuda/src/com/nvidia/grcuda/GrCUDAContext.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737

3838
import com.nvidia.grcuda.cudalibraries.cublas.CUBLASRegistry;
3939
import com.nvidia.grcuda.cudalibraries.cuml.CUMLRegistry;
40+
import com.nvidia.grcuda.cudalibraries.cusparse.CUSPARSERegistry;
4041
import com.nvidia.grcuda.cudalibraries.tensorrt.TensorRTRegistry;
4142
import com.nvidia.grcuda.functions.BindAllFunction;
4243
import com.nvidia.grcuda.functions.BindFunction;
@@ -151,6 +152,11 @@ public GrCUDAContext(Env env) {
151152
namespace.addNamespace(trt);
152153
new TensorRTRegistry(this).registerTensorRTFunctions(trt);
153154
}
155+
if (grCUDAOptionMap.isCuSPARSEEnabled()) {
156+
Namespace sparse = new Namespace(CUSPARSERegistry.NAMESPACE);
157+
namespace.addNamespace(sparse);
158+
new CUSPARSERegistry(this).registerCUSPARSEFunctions(sparse);
159+
}
154160
this.rootNamespace = namespace;
155161
}
156162

@@ -196,6 +202,7 @@ public ConcurrentHashMap<Class<?>, CallTarget> getMapCallTargets() {
196202
return uncachedMapCallTargets;
197203
}
198204

205+
199206
/**
200207
* Compute the maximum number of concurrent threads that can be spawned by GrCUDA.
201208
* This value is usually smaller or equal than the number of logical CPU threads available on the machine.
@@ -215,4 +222,4 @@ public GrCUDAOptionMap getOptions() {
215222
public void cleanup() {
216223
this.grCUDAExecutionContext.cleanup();
217224
}
218-
}
225+
}

projects/com.nvidia.grcuda/src/com/nvidia/grcuda/GrCUDAOptionMap.java

+8
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,14 @@ public String getCuMLLibrary(){
166166
return (String) getOptionValueFromOptionKey(GrCUDAOptions.CuMLLibrary);
167167
}
168168

169+
public Boolean isCuSPARSEEnabled(){
170+
return (Boolean) getOptionValueFromOptionKey(GrCUDAOptions.CuSPARSEEnabled);
171+
}
172+
173+
public String getCuSPARSELibrary(){
174+
return (String) getOptionValueFromOptionKey(GrCUDAOptions.CuSPARSELibrary);
175+
}
176+
169177
public ExecutionPolicyEnum getExecutionPolicy(){
170178
return (ExecutionPolicyEnum) getOptionValueFromOptionKey(GrCUDAOptions.ExecutionPolicy);
171179
}

projects/com.nvidia.grcuda/src/com/nvidia/grcuda/GrCUDAOptions.java

+10-2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import com.nvidia.grcuda.cudalibraries.cublas.CUBLASRegistry;
4343
import com.nvidia.grcuda.cudalibraries.cuml.CUMLRegistry;
4444
import com.nvidia.grcuda.cudalibraries.tensorrt.TensorRTRegistry;
45+
import com.nvidia.grcuda.cudalibraries.cusparse.CUSPARSERegistry;
4546
import com.oracle.truffle.api.Option;
4647

4748
@Option.Group(GrCUDALanguage.ID)
@@ -50,13 +51,13 @@ public final class GrCUDAOptions {
5051
@Option(category = OptionCategory.USER, help = "Enable cuBLAS support.", stability = OptionStability.STABLE) //
5152
public static final OptionKey<Boolean> CuBLASEnabled = new OptionKey<>(true);
5253

53-
@Option(category = OptionCategory.USER, help = "Set the location of the cublas library.", stability = OptionStability.STABLE) //
54+
@Option(category = OptionCategory.USER, help = "Set the location of the cuBLAS library.", stability = OptionStability.STABLE) //
5455
public static final OptionKey<String> CuBLASLibrary = new OptionKey<>(CUBLASRegistry.DEFAULT_LIBRARY);
5556

5657
@Option(category = OptionCategory.USER, help = "Enable cuML support.", stability = OptionStability.STABLE) //
5758
public static final OptionKey<Boolean> CuMLEnabled = new OptionKey<>(true);
5859

59-
@Option(category = OptionCategory.USER, help = "Set the location of the cuml library.", stability = OptionStability.STABLE) //
60+
@Option(category = OptionCategory.USER, help = "Set the location of the cuML library.", stability = OptionStability.STABLE) //
6061
public static final OptionKey<String> CuMLLibrary = new OptionKey<>(CUMLRegistry.DEFAULT_LIBRARY);
6162

6263
@Option(category = OptionCategory.USER, help = "Choose the scheduling policy of GrCUDA computations", stability = OptionStability.EXPERIMENTAL) //
@@ -86,6 +87,13 @@ public final class GrCUDAOptions {
8687
@Option(category = OptionCategory.USER, help = "Set the location of the TensorRT library.", stability = OptionStability.STABLE) //
8788
public static final OptionKey<String> TensorRTLibrary = new OptionKey<>(TensorRTRegistry.DEFAULT_LIBRARY);
8889

90+
@Option(category = OptionCategory.USER, help = "Enable cuSPARSE support.", stability = OptionStability.STABLE) //
91+
public static final OptionKey<Boolean> CuSPARSEEnabled = new OptionKey<>(true);
92+
93+
@Option(category = OptionCategory.USER, help = "Set the location of the cuSPARSE library.", stability = OptionStability.EXPERIMENTAL) //
94+
public static final OptionKey<String> CuSPARSELibrary = new OptionKey<>(CUSPARSERegistry.DEFAULT_LIBRARY);
95+
8996
@Option(category = OptionCategory.USER, help = "Log the kernels execution time.", stability = OptionStability.STABLE) //
9097
public static final OptionKey<Boolean> TimeComputation = new OptionKey<>(GrCUDAOptionMap.DEFAULT_TIME_COMPUTATION);
9198
}
99+

projects/com.nvidia.grcuda/src/com/nvidia/grcuda/cudalibraries/CUDALibraryFunction.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
*/
4747
public abstract class CUDALibraryFunction extends Function {
4848

49-
private final List<ComputationArgument> computationArguments;
49+
protected final List<ComputationArgument> computationArguments;
5050

5151
/**
5252
* Constructor, it takes the name of the wrapped function and its NFI signature,

projects/com.nvidia.grcuda/src/com/nvidia/grcuda/cudalibraries/cublas/CUBLASRegistry.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ public Object call(Object[] arguments) throws ArityException {
116116
}
117117
};
118118

119-
// create wrapper for cublasDestroy: cublasError_t cublasDestroy(long handle) -> void
119+
// create wrapper for cublasDestroy: cublasError_t cublasDestroy(long handle)
120120
// cublasDestroy(long handle)
121121
cublasDestroyFunction = new Function(CUBLAS_CUBLASDESTROY.getName()) {
122122
@Override
@@ -190,7 +190,6 @@ public void registerCUBLASFunctions(Namespace namespace) {
190190
@TruffleBoundary
191191
protected Object call(Object[] arguments) {
192192
ensureInitialized();
193-
194193
try {
195194
if (nfiFunction == null) {
196195
CompilerDirectives.transferToInterpreterAndInvalidate();
@@ -265,4 +264,5 @@ private static String cublasReturnCodeToString(int returnCode) {
265264
"(sint64, sint32, sint32, sint32, sint32, sint32, pointer, pointer, sint32, pointer, sint32, pointer, pointer, sint32): sint32"));
266265
}
267266
}
267+
268268
}

0 commit comments

Comments
 (0)