Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,11 @@ public struct OrtApi
public IntPtr SetGlobalCustomJoinThreadFn;
public IntPtr SynchronizeBoundInputs;
public IntPtr SynchronizeBoundOutputs;
public IntPtr SessionOptionsAppendExecutionProvider_CUDA_V2;
public IntPtr CreateCUDAProviderOptions;
public IntPtr UpdateCUDAProviderOptions;
public IntPtr GetCUDAProviderOptionsAsString;
public IntPtr ReleaseCUDAProviderOptions;
}

internal static class NativeMethods
Expand Down Expand Up @@ -397,6 +402,15 @@ static NativeMethods()
OrtUpdateTensorRTProviderOptions = (DOrtUpdateTensorRTProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.UpdateTensorRTProviderOptions, typeof(DOrtUpdateTensorRTProviderOptions));
OrtGetTensorRTProviderOptionsAsString = (DOrtGetTensorRTProviderOptionsAsString)Marshal.GetDelegateForFunctionPointer(api_.GetTensorRTProviderOptionsAsString, typeof(DOrtGetTensorRTProviderOptionsAsString));
OrtReleaseTensorRTProviderOptions = (DOrtReleaseTensorRTProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.ReleaseTensorRTProviderOptions, typeof(DOrtReleaseTensorRTProviderOptions));

SessionOptionsAppendExecutionProvider_CUDA = (DSessionOptionsAppendExecutionProvider_CUDA)Marshal.GetDelegateForFunctionPointer(
api_.SessionOptionsAppendExecutionProvider_CUDA, typeof(DSessionOptionsAppendExecutionProvider_CUDA));
SessionOptionsAppendExecutionProvider_CUDA_V2 = (DSessionOptionsAppendExecutionProvider_CUDA_V2)Marshal.GetDelegateForFunctionPointer(
api_.SessionOptionsAppendExecutionProvider_CUDA_V2, typeof(DSessionOptionsAppendExecutionProvider_CUDA_V2));
OrtCreateCUDAProviderOptions = (DOrtCreateCUDAProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.CreateCUDAProviderOptions, typeof(DOrtCreateCUDAProviderOptions));
OrtUpdateCUDAProviderOptions = (DOrtUpdateCUDAProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.UpdateCUDAProviderOptions, typeof(DOrtUpdateCUDAProviderOptions));
OrtGetCUDAProviderOptionsAsString = (DOrtGetCUDAProviderOptionsAsString)Marshal.GetDelegateForFunctionPointer(api_.GetCUDAProviderOptionsAsString, typeof(DOrtGetCUDAProviderOptionsAsString));
OrtReleaseCUDAProviderOptions = (DOrtReleaseCUDAProviderOptions)Marshal.GetDelegateForFunctionPointer(api_.ReleaseCUDAProviderOptions, typeof(DOrtReleaseCUDAProviderOptions));
}

[DllImport(NativeLib.DllName, CharSet = CharSet.Ansi)]
Expand Down Expand Up @@ -469,6 +483,49 @@ static NativeMethods()
public delegate void DOrtReleaseTensorRTProviderOptions(IntPtr /*(OrtTensorRTProviderOptions*)*/ trtProviderOptionsInstance);
public static DOrtReleaseTensorRTProviderOptions OrtReleaseTensorRTProviderOptions;

/// <summary>
/// Creates native OrtCUDAProviderOptions instance
/// </summary>
/// <param name="cudaProviderOptionsInstance">(output) native instance of OrtCUDAProviderOptions</param>
[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */DOrtCreateCUDAProviderOptions(
out IntPtr /*(OrtCUDAProviderOptions**)*/ cudaProviderOptionsInstance);
public static DOrtCreateCUDAProviderOptions OrtCreateCUDAProviderOptions;

/// <summary>
/// Updates native OrtCUDAProviderOptions instance using given key/value pairs
/// </summary>
/// <param name="cudaProviderOptionsInstance">native instance of OrtCUDAProviderOptions</param>
/// <param name="providerOptionsKeys">configuration keys of OrtCUDAProviderOptions</param>
/// <param name="providerOptionsValues">configuration values of OrtCUDAProviderOptions</param>
/// <param name="numKeys">number of configuration keys</param>
[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */DOrtUpdateCUDAProviderOptions(
IntPtr /*(OrtCUDAProviderOptions*)*/ cudaProviderOptionsInstance,
IntPtr[] /*(const char* const *)*/ providerOptionsKeys,
IntPtr[] /*(const char* const *)*/ providerOptionsValues,
UIntPtr /*(size_t)*/ numKeys);
public static DOrtUpdateCUDAProviderOptions OrtUpdateCUDAProviderOptions;

/// <summary>
/// Get native OrtCUDAProviderOptionsV2 in serialized string
/// </summary>
/// <param name="allocator">instance of OrtAllocator</param>
/// <param name="ptr">is a UTF-8 null terminated string allocated using 'allocator'</param>
[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /* OrtStatus* */DOrtGetCUDAProviderOptionsAsString(
IntPtr /*(OrtCUDAProviderOptionsV2**)*/ cudaProviderOptionsInstance,
IntPtr /*(OrtAllocator*)*/ allocator,
out IntPtr /*(char**)*/ptr);
public static DOrtGetCUDAProviderOptionsAsString OrtGetCUDAProviderOptionsAsString;

/// <summary>
/// Releases native OrtCUDAProviderOptions instance
/// </summary>
/// <param name="cudaProviderOptionsInstance">native instance of OrtCUDAProviderOptions to be released</param>
[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate void DOrtReleaseCUDAProviderOptions(IntPtr /*(OrtCUDAProviderOptions*)*/ cudaProviderOptionsInstance);
public static DOrtReleaseCUDAProviderOptions OrtReleaseCUDAProviderOptions;
#endregion

#region Status API
Expand Down Expand Up @@ -820,6 +877,30 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca

public static DSessionOptionsAppendExecutionProvider_TensorRT_V2 SessionOptionsAppendExecutionProvider_TensorRT_V2;

/// <summary>
/// Append a CUDA EP instance (configured based on given provider options) to the native OrtSessionOptions instance
/// </summary>
/// <param name="options">Native OrtSessionOptions instance</param>
/// <param name="cudaProviderOptions">Native OrtCUDAProviderOptions instance</param>
[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /*(OrtStatus*)*/DSessionOptionsAppendExecutionProvider_CUDA(
IntPtr /*(OrtSessionOptions*)*/ options,
IntPtr /*(const OrtCUDAProviderOptions*)*/ cudaProviderOptions);

public static DSessionOptionsAppendExecutionProvider_CUDA SessionOptionsAppendExecutionProvider_CUDA;

/// <summary>
/// Append a CUDA EP instance (configured based on given provider options) to the native OrtSessionOptions instance
/// </summary>
/// <param name="options">Native OrtSessionOptions instance</param>
/// <param name="cudaProviderOptions">Native OrtCUDAProviderOptionsV2 instance</param>
[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /*(OrtStatus*)*/DSessionOptionsAppendExecutionProvider_CUDA_V2(
IntPtr /*(OrtSessionOptions*)*/ options,
IntPtr /*(const OrtCUDAProviderOptionsV2*)*/ cudaProviderOptions);

public static DSessionOptionsAppendExecutionProvider_CUDA_V2 SessionOptionsAppendExecutionProvider_CUDA_V2;

/// <summary>
/// Free Dimension override (by denotation)
/// </summary>
Expand Down
98 changes: 98 additions & 0 deletions csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.shared.cs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,104 @@ protected override bool ReleaseHandle()
#endregion
}


/// <summary>
/// Holds the options for configuring a CUDA Execution Provider instance
/// </summary>
public class OrtCUDAProviderOptions : SafeHandle
{
internal IntPtr Handle
{
get
{
return handle;
}
}


#region Constructor

/// <summary>
/// Constructs an empty OrtCUDAroviderOptions instance
/// </summary>
public OrtCUDAProviderOptions() : base(IntPtr.Zero, true)
{
NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateCUDAProviderOptions(out handle));
}

#endregion

#region Public Methods

/// <summary>
/// Get CUDA EP provider options
/// </summary>
/// <returns> return C# UTF-16 encoded string </returns>
public string GetOptions()
{
var allocator = OrtAllocator.DefaultInstance;

// Process provider options string
IntPtr providerOptions = IntPtr.Zero;
NativeApiStatus.VerifySuccess(NativeMethods.OrtGetCUDAProviderOptionsAsString(handle, allocator.Pointer, out providerOptions));
using (var ortAllocation = new OrtMemoryAllocation(allocator, providerOptions, 0))
{
return NativeOnnxValueHelper.StringFromNativeUtf8(providerOptions);
}
}

/// <summary>
/// Updates the configuration knobs of OrtCUDAProviderOptions that will eventually be used to configure a CUDA EP
/// Please refer to the following on different key/value pairs to configure a CUDA EP and their meaning:
/// https://www.onnxruntime.ai/docs/reference/execution-providers/CUDA-ExecutionProvider.html
/// </summary>
/// <param name="providerOptions">key/value pairs used to configure a CUDA Execution Provider</param>
public void UpdateOptions(Dictionary<string, string> providerOptions)
{

using (var cleanupList = new DisposableList<IDisposable>())
{
var keysArray = NativeOnnxValueHelper.ConvertNamesToUtf8(providerOptions.Keys.ToArray(), n => n, cleanupList);
var valuesArray = NativeOnnxValueHelper.ConvertNamesToUtf8(providerOptions.Values.ToArray(), n => n, cleanupList);

NativeApiStatus.VerifySuccess(NativeMethods.OrtUpdateCUDAProviderOptions(handle, keysArray, valuesArray, (UIntPtr)providerOptions.Count));
}
}

#endregion

#region Public Properties

/// <summary>
/// Overrides SafeHandle.IsInvalid
/// </summary>
/// <value>returns true if handle is equal to Zero</value>
public override bool IsInvalid { get { return handle == IntPtr.Zero; } }

#endregion

#region Private Methods


#endregion

#region SafeHandle
/// <summary>
/// Overrides SafeHandle.ReleaseHandle() to properly dispose of
/// the native instance of OrtCUDAProviderOptions
/// </summary>
/// <returns>always returns true</returns>
protected override bool ReleaseHandle()
{
NativeMethods.OrtReleaseCUDAProviderOptions(handle);
handle = IntPtr.Zero;
return true;
}

#endregion
}


/// <summary>
/// This helper class contains methods to handle values of provider options
/// </summary>
Expand Down
38 changes: 37 additions & 1 deletion csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,28 @@ public static SessionOptions MakeSessionOptionWithCudaProvider(int deviceId = 0)
return options;
}

/// <summary>
/// A helper method to construct a SessionOptions object for CUDA execution provider.
/// Use only if CUDA is installed and you have the onnxruntime package specific to this Execution Provider.
/// </summary>
/// <param name="cudaProviderOptions">CUDA EP provider options</param>
/// <returns>A SessionsOptions() object configured for execution on provider options</returns>
public static SessionOptions MakeSessionOptionWithCudaProvider(OrtCUDAProviderOptions cudaProviderOptions)
{
CheckCudaExecutionProviderDLLs();
SessionOptions options = new SessionOptions();
try
{
options.AppendExecutionProvider_CUDA(cudaProviderOptions);
return options;
}
catch (Exception)
{
options.Dispose();
throw;
}
}

/// <summary>
/// A helper method to construct a SessionOptions object for TensorRT execution.
/// Use only if CUDA/TensorRT are installed and you have the onnxruntime package specific to this Execution Provider.
Expand Down Expand Up @@ -191,6 +213,20 @@ public void AppendExecutionProvider_CUDA(int deviceId = 0)
#endif
}

/// <summary>
/// Append a CUDA EP instance (based on specified configuration) to the SessionOptions instance.
/// Use only if you have the onnxruntime package specific to this Execution Provider.
/// </summary>
/// <param name="cudaProviderOptions">CUDA EP provider options</param>
public void AppendExecutionProvider_CUDA(OrtCUDAProviderOptions cudaProviderOptions)
{
#if __MOBILE__
throw new NotSupportedException("The CUDA Execution Provider is not supported in this build");
#else
NativeApiStatus.VerifySuccess(NativeMethods.SessionOptionsAppendExecutionProvider_CUDA_V2(handle, cudaProviderOptions.Handle));
#endif
}

/// <summary>
/// Use only if you have the onnxruntime package specific to this Execution Provider.
/// </summary>
Expand Down Expand Up @@ -245,7 +281,7 @@ public void AppendExecutionProvider_Tensorrt(OrtTensorRTProviderOptions trtProvi
#if __MOBILE__
throw new NotSupportedException("The TensorRT Execution Provider is not supported in this build");
#else
NativeApiStatus.VerifySuccess(NativeMethods.SessionOptionsAppendExecutionProvider_TensorRT(handle, trtProviderOptions.Handle));
NativeApiStatus.VerifySuccess(NativeMethods.SessionOptionsAppendExecutionProvider_TensorRT_V2(handle, trtProviderOptions.Handle));
#endif
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,68 @@ public void CanCreateAndDisposeSessionWithModelPath()
}
}

#if USE_CUDA

[Fact(DisplayName = "TestCUDAProviderOptions")]
private void TestCUDAProviderOptions()
{
string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");

using (var cleanUp = new DisposableListTest<IDisposable>())
{
var cudaProviderOptions = new OrtCUDAProviderOptions();
cleanUp.Add(cudaProviderOptions);

var providerOptionsDict = new Dictionary<string, string>();
providerOptionsDict["device_id"] = "0";
providerOptionsDict["gpu_mem_limit"] = "20971520";
providerOptionsDict["arena_extend_strategy"] = "kSameAsRequested";
providerOptionsDict["cudnn_conv_algo_search"] = "DEFAULT";
providerOptionsDict["do_copy_in_default_stream"] = "1";
providerOptionsDict["cudnn_conv_use_max_workspace"] = "1";
cudaProviderOptions.UpdateOptions(providerOptionsDict);

var resultProviderOptionsDict = new Dictionary<string, string>();
ProviderOptionsValueHelper.StringToDict(cudaProviderOptions.GetOptions(), resultProviderOptionsDict);

// test provider options configuration
string value;
value = resultProviderOptionsDict["device_id"];
Assert.Equal("0", value);
value = resultProviderOptionsDict["gpu_mem_limit"];
Assert.Equal("20971520", value);
value = resultProviderOptionsDict["arena_extend_strategy"];
Assert.Equal("kSameAsRequested", value);
value = resultProviderOptionsDict["cudnn_conv_algo_search"];
Assert.Equal("DEFAULT", value);
value = resultProviderOptionsDict["do_copy_in_default_stream"];
Assert.Equal("1", value);
value = resultProviderOptionsDict["cudnn_conv_use_max_workspace"];
Assert.Equal("1", value);

// test correctness of provider options
SessionOptions options = SessionOptions.MakeSessionOptionWithCudaProvider(cudaProviderOptions);
cleanUp.Add(options);

var session = new InferenceSession(modelPath, options);
cleanUp.Add(session);

var inputMeta = session.InputMetadata;
var container = new List<NamedOnnxValue>();
float[] inputData = TestDataLoader.LoadTensorFromFile(@"bench.in"); // this is the data for only one input tensor for this model
foreach (var name in inputMeta.Keys)
{
Assert.Equal(typeof(float), inputMeta[name].ElementType);
Assert.True(inputMeta[name].IsTensor);
var tensor = new DenseTensor<float>(inputData, inputMeta[name].Dimensions);
container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
}

session.Run(container);
}
}
#endif

#if USE_TENSORRT
[Fact(DisplayName = "CanRunInferenceOnAModelWithTensorRT")]
private void CanRunInferenceOnAModelWithTensorRT()
Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/common/string_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "core/session/onnxruntime_c_api.h"

namespace onnxruntime {
#ifdef USE_TENSORRT
#if defined(USE_TENSORRT) || defined(USE_CUDA)
static char* StrDup(const std::string& str, _Inout_ OrtAllocator* allocator) {
char* output_string = reinterpret_cast<char*>(allocator->Alloc(allocator, str.size() + 1));
memcpy(output_string, str.c_str(), str.size());
Expand Down
28 changes: 28 additions & 0 deletions include/onnxruntime/core/providers/cuda/cuda_provider_options.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#include "onnxruntime_c_api.h"
#include "core/framework/arena_extend_strategy.h"

/// <summary>
/// Options for the CUDA provider that are passed to SessionOptionsAppendExecutionProvider_CUDA_V2.
/// Please note that this struct is *similar* to OrtCUDAProviderOptions but only to be used internally.
/// Going forward, new cuda provider options are to be supported via this struct and usage of the publicly defined
/// OrtCUDAProviderOptions will be deprecated over time.
/// User can only get the instance of OrtCUDAProviderOptionsV2 via CreateCUDAProviderOptions.
/// </summary>
struct OrtCUDAProviderOptionsV2 {
int device_id; // cuda device id.
int has_user_compute_stream; // indicator of user specified CUDA compute stream.
void* user_compute_stream; // user specified CUDA compute stream.
int do_copy_in_default_stream; // flag specifying if the default stream is to be used for copying.
OrtCudnnConvAlgoSearch cudnn_conv_algo_search; // cudnn algo search enum.
size_t gpu_mem_limit; // BFC Arena memory limit for CUDA.
// (will be overridden by contents of `default_memory_arena_cfg` is it exists)
onnxruntime::ArenaExtendStrategy arena_extend_strategy; // BFC Arena extension strategy.
// (will be overridden by contents of `default_memory_arena_cfg` is it exists)
OrtArenaCfg* default_memory_arena_cfg; // BFC Arena config flags.
int cudnn_conv_use_max_workspace; // flag specifying if maximum workspace can be used in cudnn conv algo search.
};
Loading