diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs index af59d13df8114..78f04654a93f5 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs @@ -145,9 +145,9 @@ public IDisposableReadOnlyCollection Run(IReadOnlyColl { using (var cleanupList = new DisposableList()) { - var inputNamesArray = ConvertNamesToUtf8(inputs, v => v.Name, cleanupList); + var inputNamesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(inputs, v => v.Name, cleanupList); var inputValuesArray = GetOrtValuesHandles(inputs, cleanupList); - var outputNamesArray = ConvertNamesToUtf8(outputNames, n => n, cleanupList); + var outputNamesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(outputNames, n => n, cleanupList); var ortValues = RunImpl(options, inputNamesArray, inputValuesArray, outputNamesArray, cleanupList); return CreateDisposableResult(ortValues, outputNames); @@ -205,9 +205,9 @@ public IDisposableReadOnlyCollection Run( using (var cleanupList = new DisposableList()) { - var inputNamesArray = ConvertNamesToUtf8(inputNames, n => n, cleanupList); + var inputNamesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(inputNames, n => n, cleanupList); IntPtr[] inputValuesArray = GetOrtValuesHandles(inputValues, true); - var outputNamesArray = ConvertNamesToUtf8(outputNames, n => n, cleanupList); + var outputNamesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(outputNames, n => n, cleanupList); var ortValues = RunImpl(options, inputNamesArray, inputValuesArray, outputNamesArray, cleanupList); @@ -262,11 +262,11 @@ public void Run( using (var cleanupList = new DisposableList()) { // prepare inputs - var inputNamesArray = ConvertNamesToUtf8(inputNames, n => n, cleanupList); + var inputNamesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(inputNames, n => n, cleanupList); IntPtr[] inputValuesArray = GetOrtValuesHandles(inputValues, true); // prepare outputs - var outputNamesArray = ConvertNamesToUtf8(outputNames, n => n, cleanupList); + var outputNamesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(outputNames, n => n, cleanupList); IntPtr[] outputValuesArray = GetOrtValuesHandles(outputValues, false); NativeApiStatus.VerifySuccess(NativeMethods.OrtRun( @@ -310,12 +310,12 @@ public void Run( IReadOnlyCollection outputs, RunOptions options) { - using(var cleanupList = new DisposableList()) + using (var cleanupList = new DisposableList()) { - var inputNamesArray = ConvertNamesToUtf8(inputs, i => i.Name, cleanupList); + var inputNamesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(inputs, i => i.Name, cleanupList); var inputValuesArray = GetOrtValuesHandles(inputs, cleanupList); - var outputNamesArray = ConvertNamesToUtf8(outputs, o => o.Name, cleanupList); + var outputNamesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(outputs, o => o.Name, cleanupList); var outputValuesArray = GetOrtValuesHandles(outputs, cleanupList); NativeApiStatus.VerifySuccess(NativeMethods.OrtRun( @@ -367,14 +367,14 @@ public void Run( throw new ArgumentException($"Length of {nameof(outputNames)} ({outputNames.Count}) must match that of {nameof(outputValues)} ({outputValues.Count})."); } - using(var cleanupList = new DisposableList()) + using (var cleanupList = new DisposableList()) { // prepare inputs - var inputNamesArray = ConvertNamesToUtf8(inputs, i => i.Name, cleanupList); + var inputNamesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(inputs, i => i.Name, cleanupList); var inputValuesArray = GetOrtValuesHandles(inputs, cleanupList); // prepare outputs - var outputNamesArray = ConvertNamesToUtf8(outputNames, n => n, cleanupList); + var outputNamesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(outputNames, n => n, cleanupList); var outputValuesArray = GetOrtValuesHandles(outputValues, false); NativeApiStatus.VerifySuccess(NativeMethods.OrtRun( @@ -428,14 +428,14 @@ public void Run( throw new ArgumentException($"Length of {nameof(inputNames)} ({inputNames.Count}) must match that of {nameof(inputValues)} ({inputValues.Count})."); } - using(var cleanupList = new DisposableList()) + using (var cleanupList = new DisposableList()) { // prepare inputs - var inputNamesArray = ConvertNamesToUtf8(inputNames, n => n, cleanupList); + var inputNamesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(inputNames, n => n, cleanupList); var inputValuesArray = GetOrtValuesHandles(inputValues, true); // prepare outputs - var outputNamesArray = ConvertNamesToUtf8(outputs, o => o.Name, cleanupList); + var outputNamesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(outputs, o => o.Name, cleanupList); var outputValuesArray = GetOrtValuesHandles(outputs, cleanupList); NativeApiStatus.VerifySuccess(NativeMethods.OrtRun( @@ -515,7 +515,8 @@ public IDisposableReadOnlyCollection RunWithBindingAnd var ortValue = ortValues.ElementAt(i); result.Add(DisposableNamedOnnxValue.CreateFromOrtValue(outputNames[i], ortValue)); } - } catch(Exception e) + } + catch (Exception e) { result.Dispose(); throw e; @@ -535,36 +536,12 @@ public string EndProfiling() NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionEndProfiling(_nativeHandle, allocator.Pointer, out nameHandle)); - using(var allocation = new OrtMemoryAllocation(allocator, nameHandle, 0)) + using (var allocation = new OrtMemoryAllocation(allocator, nameHandle, 0)) { return NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle); } } - // Delegate for string extraction from an arbitrary input/output object - private delegate string NameExtractor(TInput input); - - /// - /// Run helper - /// - /// names to convert to zero terminated utf8 and pin - /// list to add pinned memory to for later disposal - /// - private IntPtr[] ConvertNamesToUtf8(IReadOnlyCollection inputs, NameExtractor extractor, - DisposableList cleanupList) - { - var result = new IntPtr[inputs.Count]; - for (int i = 0; i < inputs.Count; ++i) - { - var name = extractor(inputs.ElementAt(i)); - var utf8Name = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(name); - var pinnedHandle = new PinnedGCHandle(GCHandle.Alloc(utf8Name, GCHandleType.Pinned)); - result[i] = pinnedHandle.Pointer; - cleanupList.Add(pinnedHandle); - } - return result; - } - /// /// This function obtains ortValues for NamedOnnxValue. /// The problem with NamedOnnxValue is that it does not contain any Onnx (OrtValue) @@ -609,8 +586,8 @@ private IntPtr[] GetOrtValuesHandles(IReadOnlyCollection v } - private DisposableList RunImpl(RunOptions options, IntPtr[] inputNames, IntPtr[] inputValues, IntPtr[] outputNames, - DisposableList cleanupList) + private DisposableList RunImpl(RunOptions options, IntPtr[] inputNames, IntPtr[] inputValues, IntPtr[] outputNames, + DisposableList cleanupList) { var ortValues = new DisposableList(outputNames.Length); cleanupList.Add(ortValues); @@ -680,11 +657,11 @@ public ModelMetadata ModelMetadata /// public ulong ProfilingStartTimeNs { - get - { - return _profilingStartTimeNs; - } - } + get + { + return _profilingStartTimeNs; + } + } #endregion @@ -757,8 +734,8 @@ private void InitWithSessionHandle(IntPtr session, SessionOptions options) // set profiling's start time UIntPtr startTime = UIntPtr.Zero; NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetProfilingStartTimeNs(_nativeHandle, - out startTime)); - _profilingStartTimeNs = (ulong) startTime; + out startTime)); + _profilingStartTimeNs = (ulong)startTime; } catch (OnnxRuntimeException e) { @@ -821,7 +798,7 @@ private string GetOverridableInitializerName(ulong index) (UIntPtr)index, allocator.Pointer, out nameHandle)); - using(var ortAllocation = new OrtMemoryAllocation(allocator, nameHandle, 0)) + using (var ortAllocation = new OrtMemoryAllocation(allocator, nameHandle, 0)) { str = NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle); } @@ -963,7 +940,7 @@ public void Dispose() /// true if invoked from Dispose() method protected virtual void Dispose(bool disposing) { - if(_disposed) + if (_disposed) { return; } @@ -1137,7 +1114,7 @@ internal ModelMetadata(InferenceSession session) } // Process each key via the stored key handles - foreach(var allocation in ortAllocationKeys) + foreach (var allocation in ortAllocationKeys) { IntPtr keyHandle = allocation.Pointer; IntPtr valueHandle = IntPtr.Zero; @@ -1160,9 +1137,9 @@ internal ModelMetadata(InferenceSession session) { // Free ModelMetadata handle - NativeMethods.OrtReleaseModelMetadata(modelMetadataHandle); + NativeMethods.OrtReleaseModelMetadata(modelMetadataHandle); - } + } } diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs index f416fa20082f4..66e1cd45a7f13 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs @@ -189,6 +189,9 @@ public struct OrtApi public IntPtr CreateArenaCfg; public IntPtr ReleaseArenaCfg; public IntPtr ModelMetadataGetGraphDescription; + public IntPtr CreateCUDAProviderOptions; + public IntPtr UpdateCUDAProviderOptions; + public IntPtr ReleaseCUDAProviderOptions; } internal static class NativeMethods @@ -255,6 +258,8 @@ static NativeMethods() OrtRegisterCustomOpsLibrary = (DOrtRegisterCustomOpsLibrary)Marshal.GetDelegateForFunctionPointer(api_.RegisterCustomOpsLibrary, typeof(DOrtRegisterCustomOpsLibrary)); OrtAddSessionConfigEntry = (DOrtAddSessionConfigEntry)Marshal.GetDelegateForFunctionPointer(api_.AddSessionConfigEntry, typeof(DOrtAddSessionConfigEntry)); OrtAddInitializer = (DOrtAddInitializer)Marshal.GetDelegateForFunctionPointer(api_.AddInitializer, typeof(DOrtAddInitializer)); + SessionOptionsAppendExecutionProvider_CUDA = (DSessionOptionsAppendExecutionProvider_CUDA)Marshal.GetDelegateForFunctionPointer( + api_.SessionOptionsAppendExecutionProvider_CUDA, typeof(DSessionOptionsAppendExecutionProvider_CUDA)); OrtCreateRunOptions = (DOrtCreateRunOptions)Marshal.GetDelegateForFunctionPointer(api_.CreateRunOptions, typeof(DOrtCreateRunOptions)); OrtReleaseRunOptions = (DOrtReleaseRunOptions)Marshal.GetDelegateForFunctionPointer(api_.ReleaseRunOptions, typeof(DOrtReleaseRunOptions)); @@ -334,6 +339,10 @@ static NativeMethods() OrtGetAvailableProviders = (DOrtGetAvailableProviders)Marshal.GetDelegateForFunctionPointer(api_.GetAvailableProviders, typeof(DOrtGetAvailableProviders)); OrtReleaseAvailableProviders = (DOrtReleaseAvailableProviders)Marshal.GetDelegateForFunctionPointer(api_.ReleaseAvailableProviders, typeof(DOrtReleaseAvailableProviders)); + + OrtCreateCUDAProviderOptions = (DOrtCreateCUDAProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.CreateCUDAProviderOptions, typeof(DOrtCreateCUDAProviderOptions)); + OrtUpdateCUDAProviderOptions = (DOrtUpdateCUDAProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.UpdateCUDAProviderOptions, typeof(DOrtUpdateCUDAProviderOptions)); + OrtReleaseCUDAProviderOptions = (DOrtReleaseCUDAProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.ReleaseCUDAProviderOptions, typeof(DOrtReleaseCUDAProviderOptions)); } [DllImport(nativeLib, CharSet = charSet)] @@ -356,6 +365,37 @@ static NativeMethods() #endregion Runtime/Environment API + #region Provider Options API + /// + /// Creates native OrtCUDAProviderOptions instance + /// + /// (output) native instance of OrtCUDAProviderOptions + public delegate IntPtr /* OrtStatus* */DOrtCreateCUDAProviderOptions( + out IntPtr /*(OrtCUDAProviderOptions**)*/ cudaProviderOptionsInstance); + public static DOrtCreateCUDAProviderOptions OrtCreateCUDAProviderOptions; + + /// + /// Updates native OrtCUDAProviderOptions instance using given key/value pairs + /// + /// native instance of OrtCUDAProviderOptions + /// configuration keys of OrtCUDAProviderOptions + /// configuration values of OrtCUDAProviderOptions + /// number of configuration keys + public delegate IntPtr /* OrtStatus* */DOrtUpdateCUDAProviderOptions( + IntPtr /*(OrtCUDAProviderOptions*)*/ cudaProviderOptionsInstance, + IntPtr[] /*(const char* const *)*/ providerOptionsKeys, + IntPtr[] /*(const char* const *)*/ providerOptionsValues, + UIntPtr /*(size_t)*/ numKeys); + public static DOrtUpdateCUDAProviderOptions OrtUpdateCUDAProviderOptions; + + /// + /// Releases native OrtCUDAProviderOptions instance + /// + /// native instance of OrtCUDAProviderOptions to be released + public delegate void DOrtReleaseCUDAProviderOptions(IntPtr /*(OrtCUDAProviderOptions*)*/ cudaProviderOptionsInstance); + public static DOrtReleaseCUDAProviderOptions OrtReleaseCUDAProviderOptions; + #endregion + #region Status API public delegate ErrorCode DOrtGetErrorCode(IntPtr /*(OrtStatus*)*/status); public static DOrtGetErrorCode OrtGetErrorCode; @@ -560,6 +600,16 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca [DllImport(nativeLib, CharSet = charSet)] public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_CUDA(IntPtr /*(OrtSessionOptions*) */ options, int device_id); + /// + /// Append a CUDA EP instance (configured based on given provider options) to the native OrtSessionOptions instance + /// + /// Native OrtSessionOptions instance + /// Native OrtCUDAProviderOptions instance + public delegate IntPtr /*(OrtStatus*)*/DSessionOptionsAppendExecutionProvider_CUDA( + IntPtr /*(OrtSessionOptions*)*/ options, + IntPtr /*(const OrtCUDAProviderOptions*)*/ cudaProviderOptions); + public static DSessionOptionsAppendExecutionProvider_CUDA SessionOptionsAppendExecutionProvider_CUDA; + [DllImport(nativeLib, CharSet = charSet)] public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_DML(IntPtr /*(OrtSessionOptions*) */ options, int device_id); diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs index efb619f345d4f..0a1c3d693c238 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs @@ -3,6 +3,8 @@ using Microsoft.ML.OnnxRuntime.Tensors; using System; +using System.Linq; +using System.Collections.Generic; using System.Runtime.InteropServices; using System.Text; @@ -41,7 +43,7 @@ public void Dispose() // No need for the finalizer // If this is not disposed timely GC can't help us #endregion - } + } /// /// This helper class contains methods to create native OrtValue from a managed value object @@ -77,6 +79,32 @@ internal static string StringFromNativeUtf8(IntPtr nativeUtf8) Marshal.Copy(nativeUtf8, buffer, 0, len); return Encoding.UTF8.GetString(buffer, 0, buffer.Length); } + + /// + /// Run helper + /// + /// names to convert to zero terminated utf8 and pin + /// delegate for string extraction from inputs + /// list to add pinned memory to for later disposal + /// + internal static IntPtr[] ConvertNamesToUtf8(IReadOnlyCollection names, NameExtractor extractor, + DisposableList cleanupList) + { + var result = new IntPtr[names.Count]; + for (int i = 0; i < names.Count; ++i) + { + var name = extractor(names.ElementAt(i)); + var utf8Name = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(name); + var pinnedHandle = new PinnedGCHandle(GCHandle.Alloc(utf8Name, GCHandleType.Pinned)); + result[i] = pinnedHandle.Pointer; + cleanupList.Add(pinnedHandle); + } + return result; + } + + // Delegate for string extraction from an arbitrary input/output object + internal delegate string NameExtractor(TInput input); + } internal static class TensorElementTypeConverter @@ -84,7 +112,7 @@ internal static class TensorElementTypeConverter public static void GetTypeAndWidth(TensorElementType elemType, out Type type, out int width) { TensorElementTypeInfo result = TensorBase.GetElementTypeInfo(elemType); - if(result != null) + if (result != null) { type = result.TensorType; width = result.TypeSize; diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs new file mode 100644 index 0000000000000..ef9763d53c3dc --- /dev/null +++ b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs @@ -0,0 +1,90 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; + +namespace Microsoft.ML.OnnxRuntime +{ + /// + /// Holds the options for configuring a CUDA Execution Provider instance + /// + public class OrtCUDAProviderOptions : SafeHandle + { + internal IntPtr Handle + { + get + { + return handle; + } + } + + #region Constructor + + /// + /// Constructs an empty OrtCUDAProviderOptions instance + /// + public OrtCUDAProviderOptions() : base(IntPtr.Zero, true) + { + NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateCUDAProviderOptions(out handle)); + } + + #endregion + + #region Public Methods + + /// + /// Updates the configuration knobs of OrtCUDAProviderOptions that will eventually be used to configure a CUDA EP + /// Please refer to the following on different key/value pairs to configure a CUDA EP and their meaning: + /// https://www.onnxruntime.ai/docs/reference/execution-providers/CUDA-ExecutionProvider.html + /// + /// key/value pairs used to configure a CUDA Execution Provider + + public void UpdateOptions(Dictionary providerOptions) + { + + using (var cleanupList = new DisposableList()) + { + var keysArray = NativeOnnxValueHelper.ConvertNamesToUtf8(providerOptions.Keys.ToArray(), n => n, cleanupList); + var valuesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(providerOptions.Values.ToArray(), n => n, cleanupList); + + NativeApiStatus.VerifySuccess(NativeMethods.OrtUpdateCUDAProviderOptions(handle, keysArray, valuesArray, (UIntPtr)providerOptions.Count)); + } + } + + #endregion + + #region Public Properties + + /// + /// Overrides SafeHandle.IsInvalid + /// + /// returns true if handle is equal to Zero + public override bool IsInvalid { get { return handle == IntPtr.Zero; } } + + #endregion + + #region Private Methods + + + #endregion + + #region SafeHandle + /// + /// Overrides SafeHandle.ReleaseHandle() to properly dispose of + /// the native instance of OrtCUDAProviderOptions + /// + /// always returns true + protected override bool ReleaseHandle() + { + NativeMethods.OrtReleaseCUDAProviderOptions(handle); + handle = IntPtr.Zero; + return true; + } + + #endregion + } + +} diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs index 0d4a9a090f334..4f9b1a99f8f71 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs @@ -118,6 +118,16 @@ public void AppendExecutionProvider_CUDA(int deviceId) NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_CUDA(handle, deviceId)); } + /// + /// Append a CUDA EP instance (based on specified configuration) to the SessionOptions instance + /// Use only if you have the onnxruntime package specific to this Execution Provider. + /// + /// CUDA EP provider options to configure the CUDA EP instance + public void AppendExecutionProvider_CUDA(OrtCUDAProviderOptions cudaProviderOptions) + { + NativeApiStatus.VerifySuccess(NativeMethods.SessionOptionsAppendExecutionProvider_CUDA(handle, cudaProviderOptions.Handle)); + } + /// /// Use only if you have the onnxruntime package specific to this Execution Provider. /// @@ -247,7 +257,7 @@ public void AddSessionConfigEntry(string configKey, string configValue) using (var pinnedConfigKeyName = new PinnedGCHandle(utf8NameConfigKeyPinned)) using (var pinnedConfigValueName = new PinnedGCHandle(utf8NameConfigValuePinned)) { - NativeApiStatus.VerifySuccess(NativeMethods.OrtAddSessionConfigEntry(handle, + NativeApiStatus.VerifySuccess(NativeMethods.OrtAddSessionConfigEntry(handle, pinnedConfigKeyName.Pointer, pinnedConfigValueName.Pointer)); } } diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs index 54188f26472b1..54d8f91100c8a 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs @@ -301,7 +301,7 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev // Run inference with outputs pinned from buffers using (var pinnedInputs = new DisposableListTest()) - using(var pinnedOutputs = new DisposableListTest()) + using (var pinnedOutputs = new DisposableListTest()) { var memInfo = OrtMemoryInfo.DefaultInstance; // CPU @@ -326,7 +326,7 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev longShape = Array.ConvertAll(outputMeta[outputName].Dimensions, d => d); byteSize = longShape.Aggregate(1L, (a, b) => a * b) * sizeof(float); float[] outputBuffer = new float[expectedOutput.Length]; - pinnedOutputs.Add(FixedBufferOnnxValue.CreateFromMemory(memInfo, outputBuffer, + pinnedOutputs.Add(FixedBufferOnnxValue.CreateFromMemory(memInfo, outputBuffer, TensorElementType.Float, longShape, byteSize)); session.Run(inputNames, pinnedInputs, outputNames, pinnedOutputs); @@ -1009,7 +1009,7 @@ private void UnloadLibrary(IntPtr libraryHandle) { if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - if(!FreeLibrary(libraryHandle)) + if (!FreeLibrary(libraryHandle)) { throw new Exception("Could not unload the provided shared library using its handle"); } @@ -2205,6 +2205,30 @@ private void TestSharedAllocatorUsingCreateAndRegisterAllocator() } } + [Fact] + private void TestCUDAProviderOptions() + { + #if USE_CUDA + using (var cudaProviderOptions = new OrtCUDAProviderOptions()) + { + var providerOptionsDict = new Dictionary(); + providerOptionsDict["device_id"] = "0"; + providerOptionsDict["arena_extend_strategy"] = "kSameAsRequested"; + providerOptionsDict["cuda_mem_limit"] = "200000"; + providerOptionsDict["cudnn_conv_algo_search"] = "HEURISTIC"; + providerOptionsDict["do_copy_in_default_stream"] = "1"; + + cudaProviderOptions.UpdateOptions(providerOptionsDict); + + using (var sessionOptions = new SessionOptions()) + { + sessionOptions.AppendExecutionProvider_CUDA(cudaProviderOptions); + } + } + + #endif + } + [DllImport("kernel32", SetLastError = true)] static extern IntPtr LoadLibrary(string lpFileName); @@ -2448,7 +2472,7 @@ static NamedOnnxValue CreateNamedOnnxValueFromRawData(string name, byte[] raw { T[] typedArr = new T[rawData.Length / elemWidth]; var typeOf = typeof(T); - if(typeOf == typeof(Float16) || typeOf == typeof(BFloat16)) + if (typeOf == typeof(Float16) || typeOf == typeof(BFloat16)) { using (var memSrcHandle = new Memory(rawData).Pin()) using (var memDstHandle = new Memory(typedArr).Pin()) diff --git a/include/onnxruntime/core/framework/cuda_provider_options.h b/include/onnxruntime/core/framework/cuda_provider_options.h new file mode 100644 index 0000000000000..ee9ecf24ab607 --- /dev/null +++ b/include/onnxruntime/core/framework/cuda_provider_options.h @@ -0,0 +1,22 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +typedef enum OrtCudnnConvAlgoSearch { + EXHAUSTIVE, // expensive exhaustive benchmarking using cudnnFindConvolutionForwardAlgorithmEx + HEURISTIC, // lightweight heuristic based search using cudnnGetConvolutionForwardAlgorithm_v7 + DEFAULT, // default algorithm using CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM +} OrtCudnnConvAlgoSearch; + +/// +/// This is CUDA provider specific but needs to live in a header that is build-flavor agnostic +/// Options for the CUDA provider that are passed to SessionOptionsAppendExecutionProvider_CUDA +/// +typedef struct OrtCUDAProviderOptions { + int device_id; // cuda device with id=0 as default device. + OrtCudnnConvAlgoSearch cudnn_conv_algo_search; // cudnn conv algo search option + size_t cuda_mem_limit; // default cuda memory limitation to maximum finite value of size_t. + int arena_extend_strategy; // default area extend strategy to KNextPowerOfTwo. + int do_copy_in_default_stream; // default true +} OrtCUDAProviderOptions; diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index ab8b65cbd2f00..55e06a7ceea8d 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -165,6 +165,7 @@ ORT_RUNTIME_CLASS(ModelMetadata); ORT_RUNTIME_CLASS(ThreadPoolParams); ORT_RUNTIME_CLASS(ThreadingOptions); ORT_RUNTIME_CLASS(ArenaCfg); +ORT_RUNTIME_CLASS(CUDAProviderOptions); #ifdef _WIN32 typedef _Return_type_success_(return == 0) OrtStatus* OrtStatusPtr; @@ -251,23 +252,6 @@ typedef enum OrtMemType { OrtMemTypeDefault = 0, // the default allocator for execution provider } OrtMemType; -typedef enum OrtCudnnConvAlgoSearch { - EXHAUSTIVE, // expensive exhaustive benchmarking using cudnnFindConvolutionForwardAlgorithmEx - HEURISTIC, // lightweight heuristic based search using cudnnGetConvolutionForwardAlgorithm_v7 - DEFAULT, // default algorithm using CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM -} OrtCudnnConvAlgoSearch; - -/// -/// Options for the CUDA provider that are passed to SessionOptionsAppendExecutionProvider_CUDA -/// -typedef struct OrtCUDAProviderOptions { - int device_id; // cuda device with id=0 as default device. - OrtCudnnConvAlgoSearch cudnn_conv_algo_search; // cudnn conv algo search option - size_t cuda_mem_limit; // default cuda memory limitation to maximum finite value of size_t. - int arena_extend_strategy; // default area extend strategy to KNextPowerOfTwo. - int do_copy_in_default_stream; -} OrtCUDAProviderOptions; - /// /// Options for the OpenVINO provider that are passed to SessionOptionsAppendExecutionProvider_OpenVINO /// @@ -1146,6 +1130,26 @@ struct OrtApi { */ ORT_API2_STATUS(ModelMetadataGetGraphDescription, _In_ const OrtModelMetadata* model_metadata, _Inout_ OrtAllocator* allocator, _Outptr_ char** value); + + /** + * Use this API to create the configuration of a CUDA Execution Provider + */ + ORT_API2_STATUS(CreateCUDAProviderOptions, _Outptr_ OrtCUDAProviderOptions** out); + + /** + * Use this API to set the appropriate configuration knobs of a CUDA Execution Provider + * Please refer to the following on different key/value pairs to configure a CUDA EP and their meaning: + * https://www.onnxruntime.ai/docs/reference/execution-providers/CUDA-ExecutionProvider.html + */ + ORT_API2_STATUS(UpdateCUDAProviderOptions, _Inout_ OrtCUDAProviderOptions* cuda_provider_options, + _In_reads_(num_keys) const char* const* provider_options_keys, + _In_reads_(num_keys) const char* const* provider_options_values, + _In_ size_t num_keys); + + /** + * Use this API to release the configuration of a CUDA Execution Provider + */ + ORT_CLASS_RELEASE(CUDAProviderOptions); }; /* diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h index 5ba2d07b9cab0..05a32a8de05d8 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h @@ -8,7 +8,7 @@ #include "core/framework/arena_extend_strategy.h" #include "core/framework/ortdevice.h" #include "core/framework/provider_options.h" -#include "core/session/onnxruntime_c_api.h" +#include "core/framework/cuda_provider_options.h" namespace onnxruntime { // Information needed to construct CUDA execution providers. diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 0a85199e9a3ea..1c3bfeb672da8 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -6,6 +6,7 @@ #include "core/session/inference_session_utils.h" #include "core/session/IOBinding.h" #include "core/framework/allocator.h" +#include "core/framework/cuda_provider_options.h" #include "core/framework/error_code_helper.h" #include "core/framework/execution_provider.h" #include "core/framework/utils.h" @@ -37,6 +38,7 @@ #include "core/platform/ort_mutex.h" #ifdef USE_CUDA #include "core/providers/cuda/cuda_provider_factory.h" +#include "core/providers/cuda/cuda_provider_factory_creator.h" #endif using namespace onnxruntime::logging; @@ -1852,6 +1854,56 @@ ORT_API(void, OrtApis::ReleaseArenaCfg, _Frees_ptr_opt_ OrtArenaCfg* ptr) { delete ptr; } +ORT_API_STATUS_IMPL(OrtApis::CreateCUDAProviderOptions, _Outptr_ OrtCUDAProviderOptions** out) { + API_IMPL_BEGIN +#ifdef USE_CUDA + *out = new OrtCUDAProviderOptions(); + return nullptr; +#else + ORT_UNUSED_PARAMETER(out); + return CreateStatus(ORT_FAIL, "CUDA execution provider is not enabled in this build."); +#endif + API_IMPL_END +} + +ORT_API_STATUS_IMPL(OrtApis::UpdateCUDAProviderOptions, + _Inout_ OrtCUDAProviderOptions* cuda_provider_options, + _In_reads_(num_keys) const char* const* provider_options_keys, + _In_reads_(num_keys) const char* const* provider_options_values, + size_t num_keys) { + API_IMPL_BEGIN +#ifdef USE_CUDA + ProviderOptions provider_options_map; + for (size_t i = 0; i != num_keys; ++i) { + if (provider_options_keys[i] == nullptr || provider_options_keys[i][0] == '\0' || + provider_options_values == nullptr || provider_options_values[i][0] == '\0') { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "key/value cannot be empty"); + } + provider_options_map[provider_options_keys[i]] = provider_options_values[i]; + } + + auto internal_options = CUDAExecutionProviderInfo::FromProviderOptions(provider_options_map); + + cuda_provider_options->arena_extend_strategy = static_cast(internal_options.arena_extend_strategy); + cuda_provider_options->cuda_mem_limit = internal_options.cuda_mem_limit; + cuda_provider_options->cudnn_conv_algo_search = internal_options.cudnn_conv_algo_search; + cuda_provider_options->device_id = internal_options.device_id; + cuda_provider_options->do_copy_in_default_stream = internal_options.do_copy_in_default_stream; + return nullptr; +#else + ORT_UNUSED_PARAMETER(cuda_provider_options); + ORT_UNUSED_PARAMETER(provider_options_keys); + ORT_UNUSED_PARAMETER(provider_options_values); + ORT_UNUSED_PARAMETER(num_keys); + return CreateStatus(ORT_FAIL, "CUDA execution provider is not enabled in this build."); +#endif + API_IMPL_END +} + +ORT_API(void, OrtApis::ReleaseCUDAProviderOptions, _Frees_ptr_opt_ OrtCUDAProviderOptions* ptr) { + delete ptr; +} + static constexpr OrtApiBase ort_api_base = { &OrtApis::GetApi, &OrtApis::GetVersionString, @@ -2086,6 +2138,9 @@ static constexpr OrtApi ort_api_1_to_7 = { // Version 7 - In development, feel free to add/remove/rearrange here &OrtApis::ModelMetadataGetGraphDescription, + &OrtApis::CreateCUDAProviderOptions, + &OrtApis::UpdateCUDAProviderOptions, + &OrtApis::ReleaseCUDAProviderOptions, }; // Assert to do a limited check to ensure Version 1 of OrtApi never changes (will detect an addition or deletion but not if they cancel out each other) diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index a49b785c5a495..a512179245a5c 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -255,4 +255,12 @@ ORT_API_STATUS_IMPL(SetGlobalDenormalAsZero, _Inout_ OrtThreadingOptions* option ORT_API_STATUS_IMPL(CreateArenaCfg, _In_ size_t max_mem, int arena_extend_strategy, int initial_chunk_size_bytes, int max_dead_bytes_per_chunk, _Outptr_ OrtArenaCfg** out); ORT_API(void, ReleaseArenaCfg, _Frees_ptr_opt_ OrtArenaCfg*); + +ORT_API_STATUS_IMPL(CreateCUDAProviderOptions, _Outptr_ OrtCUDAProviderOptions** out); +ORT_API_STATUS_IMPL(UpdateCUDAProviderOptions, _Inout_ OrtCUDAProviderOptions* cuda_provider_options, + _In_reads_(num_keys) const char* const* provider_options_keys, + _In_reads_(num_keys) const char* const* provider_options_values, + size_t num_keys); +ORT_API(void, ReleaseCUDAProviderOptions, _Frees_ptr_opt_ OrtCUDAProviderOptions*); + } // namespace OrtApis diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 6b185c206302b..0e6aa20ac7243 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -19,6 +19,7 @@ #include "core/session/onnxruntime_cxx_api.h" #include "core/optimizer/graph_transformer_level.h" #include "core/framework/session_options.h" +#include "core/framework/cuda_provider_options.h" #include "core/session/onnxruntime_session_options_config_keys.h" using namespace onnxruntime; diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index a573f056919f1..1315b8a5b99d2 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -41,13 +41,49 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device #endif } else if (provider_name == onnxruntime::kCudaExecutionProvider) { #ifdef USE_CUDA - OrtCUDAProviderOptions cuda_options{ - 0, - static_cast(performance_test_config.run_config.cudnn_conv_algo), - std::numeric_limits::max(), - 0, - !performance_test_config.run_config.do_cuda_copy_in_separate_stream}; - session_options.AppendExecutionProvider_CUDA(cuda_options); + const auto& api = Ort::GetApi(); + + OrtCUDAProviderOptions* cuda_options; + Ort::ThrowOnError(api.CreateCUDAProviderOptions(&cuda_options)); + std::unique_ptr rel_cuda_options(cuda_options, api.ReleaseCUDAProviderOptions); + + std::vector cuda_options_keys{"device_id", "arena_extend_strategy", "cuda_mem_limit", + "cudnn_conv_algo_search", "do_copy_in_default_stream"}; + std::vector cuda_options_values; + cuda_options_values.reserve(5); + + // device id + cuda_options_values.push_back("0"); + + // arena extend strategy + cuda_options_values.push_back("kNextPowerOfTwo"); + + // cuda mem limit + auto size_t_numeric_limits_max_string = std::to_string(std::numeric_limits::max()); + cuda_options_values.push_back(size_t_numeric_limits_max_string.c_str()); + + // cudnn conv algo search + switch (performance_test_config.run_config.cudnn_conv_algo) { + case 0: + cuda_options_values.push_back("EXHAUSTIVE"); + break; + case 1: + cuda_options_values.push_back("HEURISTIC"); + break; + case 2: + cuda_options_values.push_back("DEFAULT"); + break; + default: + ORT_THROW("Unsupported value for cudnn_conv_algo. only 0, 1, 2 are supported. Got: ", performance_test_config.run_config.cudnn_conv_algo); + } + + // do copy in default stream + if (!performance_test_config.run_config.do_cuda_copy_in_separate_stream) + cuda_options_values.push_back("1"); + else + cuda_options_values.push_back("0"); + + session_options.AppendExecutionProvider_CUDA(*cuda_options); #else ORT_THROW("CUDA is not supported in this build\n"); #endif diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index 4efdd116e64a8..2e89b51555b13 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -1192,7 +1192,7 @@ TEST(CApiTest, get_available_providers) { char** providers; ASSERT_EQ(g_ort->GetAvailableProviders(&providers, &len), nullptr); ASSERT_GT(len, 0); - ASSERT_STREQ(providers[len-1], "CPUExecutionProvider"); + ASSERT_STREQ(providers[len - 1], "CPUExecutionProvider"); ASSERT_EQ(g_ort->ReleaseAvailableProviders(providers, len), nullptr); } @@ -1309,7 +1309,6 @@ TEST(CApiTest, TestSharingOfInitializer) { expected_values_y, nullptr); } - #ifndef ORT_NO_RTTI TEST(CApiTest, TestIncorrectInputTypeToModel_Tensors) { // simple inference test @@ -1338,6 +1337,7 @@ TEST(CApiTest, TestIncorrectInputTypeToModel_Tensors) { ASSERT_TRUE(exception_thrown); } + TEST(CApiTest, TestIncorrectInputTypeToModel_SequenceTensors) { // simple inference test // prepare inputs (incorrect type) @@ -1371,3 +1371,24 @@ TEST(CApiTest, TestIncorrectInputTypeToModel_SequenceTensors) { ASSERT_TRUE(exception_thrown); } #endif + +#ifdef USE_CUDA + +// This test uses CreateCUDAProviderOptions/UpdateCUDAProviderOptions APIs to configure and create +// a CUDA Execution Provider +TEST(CApiTest, TestCreatingCUDAProviderOptions) { + const auto& api = Ort::GetApi(); + OrtCUDAProviderOptions* cuda_options; + ASSERT_TRUE(api.CreateCUDAProviderOptions(&cuda_options) == nullptr); + std::unique_ptr rel_cuda_options(cuda_options, api.ReleaseCUDAProviderOptions); + + std::vector keys{"device_id", "arena_extend_strategy", "cuda_mem_limit", + "cudnn_conv_algo_search", "do_copy_in_default_stream"}; + std::vector values{"0", "kSameAsRequested", "200000", + "HEURISTIC", "1"}; + ASSERT_TRUE(api.UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), 5) == nullptr); + + Ort::SessionOptions session_options; + ASSERT_TRUE(api.SessionOptionsAppendExecutionProvider_CUDA(static_cast(session_options), cuda_options) == nullptr); +} +#endif