diff --git a/docs/execution-providers/CUDA-ExecutionProvider.md b/docs/execution-providers/CUDA-ExecutionProvider.md index 501412369036d..864743c23a322 100644 --- a/docs/execution-providers/CUDA-ExecutionProvider.md +++ b/docs/execution-providers/CUDA-ExecutionProvider.md @@ -84,6 +84,7 @@ Default value: true ### cudnn_conv_use_max_workspace Check [tuning performance for convolution heavy models](../performance/tune-performance.md#convolution-heavy-models-and-the-cuda-ep) for details on what this flag does. +This flag is only supported from the V2 version of the provider options struct when used using the C API. The V2 provider options struct can be created using [this](https://onnxruntime.ai/docs/api/c/struct_ort_api.html#a0d29cbf555aa806c050748cf8d2dc172) and updated using [this](https://onnxruntime.ai/docs/api/c/struct_ort_api.html#a4710fc51f75a4b9a75bde20acbfa0783). Please take a look at the sample below for an example. Default value: 0 @@ -112,6 +113,8 @@ session = ort.InferenceSession(model_path, providers=providers) ### C/C++ +#### Using legacy provider options struct + ```c++ OrtSessionOptions* session_options = /* ... */; @@ -125,3 +128,36 @@ options.do_copy_in_default_stream = 1; SessionOptionsAppendExecutionProvider_CUDA(session_options, &options); ``` +#### Using V2 provider options struct + +```c++ +OrtCUDAProviderOptionsV2* cuda_options = nullptr; +CreateCUDAProviderOptions(&cuda_options); + +std::vector keys{"device_id", "gpu_mem_limit", "arena_extend_strategy", "cudnn_conv_algo_search", "do_copy_in_default_stream", "cudnn_conv_use_max_workspace"}; +std::vector values{"0", "2147483648", "kSameAsRequested", "DEFAULT", "1", "1"}; + +UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), 6); + +OrtSessionOptions* session_options = /* ... */; +SessionOptionsAppendExecutionProvider_CUDA_V2(session_options, cuda_options); + +// Finally, don't forget to release the provider options +ReleaseCUDAProviderOptions(cuda_options); +``` + +```c# +var cudaProviderOptions = new OrtCUDAProviderOptions(); // Dispose this finally + +var providerOptionsDict = new Dictionary(); +providerOptionsDict["device_id"] = "0"; +providerOptionsDict["gpu_mem_limit"] = "2147483648"; +providerOptionsDict["arena_extend_strategy"] = "kSameAsRequested"; +providerOptionsDict["cudnn_conv_algo_search"] = "DEFAULT"; +providerOptionsDict["do_copy_in_default_stream"] = "1"; +providerOptionsDict["cudnn_conv_use_max_workspace"] = "1"; + +cudaProviderOptions.UpdateOptions(providerOptionsDict); + +SessionOptions options = SessionOptions.MakeSessionOptionWithCudaProvider(cudaProviderOptions); // Dispose this finally +``` \ No newline at end of file diff --git a/docs/performance/tune-performance.md b/docs/performance/tune-performance.md index 3c3f9795c7bbf..869c6911a5aad 100644 --- a/docs/performance/tune-performance.md +++ b/docs/performance/tune-performance.md @@ -300,11 +300,35 @@ providers = [("CUDAExecutionProvider", {"cudnn_conv_use_max_workspace": '1'})] sess_options = ort.SessionOptions() sess = ort.InferenceSession("my_conv_heavy_fp16_model.onnx", sess_options = sess_options, providers=providers) ``` + * C/C++ -Support for this provider option will be added in upcoming releases. +``` +OrtCUDAProviderOptionsV2* cuda_options = nullptr; +CreateCUDAProviderOptions(&cuda_options); + +std::vector keys{"cudnn_conv_use_max_workspace"}; +std::vector values{"1"}; + +UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), 1); + +OrtSessionOptions* session_options = /* ... */; +SessionOptionsAppendExecutionProvider_CUDA_V2(session_options, cuda_options); + +// Finally, don't forget to release the provider options +ReleaseCUDAProviderOptions(cuda_options); +``` * C# -Support for this provider option will be added in upcoming releases. +``` +var cudaProviderOptions = new OrtCUDAProviderOptions(); // Dispose this finally + +var providerOptionsDict = new Dictionary(); +providerOptionsDict["cudnn_conv_use_max_workspace"] = "1"; + +cudaProviderOptions.UpdateOptions(providerOptionsDict); + +SessionOptions options = SessionOptions.MakeSessionOptionWithCudaProvider(cudaProviderOptions); // Dispose this finally +``` ## Troubleshooting performance issues