From b882a42a9430ea54284c554b6fc165f93401100e Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 30 Mar 2021 03:06:45 -0700 Subject: [PATCH 01/16] Enable TensorRT EP for C# --- .../Microsoft.ML.OnnxRuntime/NativeMethods.cs | 21 ++++ .../SessionOptions.cs | 104 ++++++++++++++++++ .../tensorrt/tensorrt_provider_factory.h | 2 + .../core/providers/tensorrt/symbols.txt | 1 + 4 files changed, 128 insertions(+) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs index 0df4c77404898..b83a48a3eefa2 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs @@ -192,6 +192,24 @@ public struct OrtApi public IntPtr ModelMetadataGetGraphDescription; } + #region ORT Provider options + [StructLayout(LayoutKind.Sequential)] + public struct OrtTensorRTProviderOptionsNative + { + public int device_id; // cuda device id. + public int has_user_compute_stream; // indicator of user specified CUDA compute stream. + public IntPtr user_compute_stream; // user specified CUDA compute stream. + public int has_trt_options; // override environment variables with following TensorRT settings at runtime. + public UIntPtr trt_max_workspace_size; // maximum workspace size for TensorRT. + public int trt_fp16_enable; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true + public int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true + public IntPtr trt_int8_calibration_table_name; // TensorRT INT8 calibration table name. + public int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true + } + #endregion + + + internal static class NativeMethods { private const string nativeLib = "onnxruntime"; @@ -574,6 +592,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca [DllImport(nativeLib, CharSet = charSet)] public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_Tensorrt(IntPtr /*(OrtSessionOptions*)*/ options, int device_id); + [DllImport(nativeLib, CharSet = charSet)] + public static extern IntPtr /*(OrtStatus*)*/ SessionOptionsAppendExecutionProvider_TensorRT(IntPtr /*(OrtSessionOptions*)*/ options, ref OrtTensorRTProviderOptionsNative trt_options); + [DllImport(nativeLib, CharSet = charSet)] public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_MIGraphX(IntPtr /*(OrtSessionOptions*)*/ options, int device_id); diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs index 6bc48a0d704da..2c4cc0ba85f00 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs @@ -38,6 +38,7 @@ public class SessionOptions : SafeHandle { // Delay-loaded CUDA or cuDNN DLLs. Currently, delayload is disabled. See cmake/CMakeLists.txt for more information. private static string[] cudaDelayLoadedLibs = { }; + private static string[] trtDelayLoadedLibs = { }; #region Constructor and Factory methods @@ -75,6 +76,63 @@ public static SessionOptions MakeSessionOptionWithCudaProvider(int deviceId = 0) return options; } + /// + /// A helper method to construct a SessionOptions object for TensorRT execution. + /// Use only if CUDA/TensorRT are installed and you have the onnxruntime package specific to this Execution Provider. + /// + /// + /// A SessionsOptions() object configured for execution on deviceId + public static SessionOptions MakeSessionOptionWithTensorrtProvider(int deviceId = 0) + { + CheckTensorrtExecutionProviderDLLs(); + SessionOptions options = new SessionOptions(); + NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_Tensorrt(options.Handle, deviceId)); + NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_CUDA(options.Handle, deviceId)); + NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_CPU(options.Handle, 1)); + return options; + } + + /// + /// A helper method to construct a SessionOptions object for TensorRT execution. + /// Use only if CUDA/TensorRT are installed and you have the onnxruntime package specific to this Execution Provider. + /// + /// + /// A SessionsOptions() object configured for execution on deviceId + /// + public static SessionOptions MakeSessionOptionWithTensorrtProvider(OrtTensorRTProviderOptions trt_options) + { + CheckTensorrtExecutionProviderDLLs(); + SessionOptions options = new SessionOptions(); + + OrtTensorRTProviderOptionsNative trt_options_native; + trt_options_native.device_id = trt_options.device_id; + trt_options_native.has_user_compute_stream = 0; + trt_options_native.user_compute_stream = IntPtr.Zero; + trt_options_native.has_trt_options = trt_options.has_trt_options; + if ((ulong)trt_options.trt_max_workspace_size > (1 << 30)) + { + trt_options_native.trt_max_workspace_size = (UIntPtr)(1 << 30); + } + else + { + trt_options_native.trt_max_workspace_size = trt_options.trt_max_workspace_size; + } + trt_options_native.trt_fp16_enable = trt_options.trt_fp16_enable; + trt_options_native.trt_int8_enable = trt_options.trt_int8_enable; + var tableNamePinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(trt_options.trt_int8_calibration_table_name), GCHandleType.Pinned); + using (var pinnedSettingsName = new PinnedGCHandle(tableNamePinned)) + { + trt_options_native.trt_int8_calibration_table_name = pinnedSettingsName.Pointer; + } + trt_options_native.trt_int8_use_native_calibration_table = trt_options.trt_int8_use_native_calibration_table; + + + NativeApiStatus.VerifySuccess(NativeMethods.SessionOptionsAppendExecutionProvider_TensorRT(options.Handle, ref trt_options_native)); + NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_CUDA(options.Handle, trt_options.device_id)); + NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_CPU(options.Handle, 1)); + return options; + } + /// /// A helper method to construct a SessionOptions object for Nuphar execution. /// Use only if you have the onnxruntime package specific to this Execution Provider. @@ -592,6 +650,31 @@ public ExecutionMode ExecutionMode } private ExecutionMode _executionMode = ExecutionMode.ORT_SEQUENTIAL; + + /// + /// Provider options for TensorRT. + /// + /// + // Example for setting: + // SessionOptions.OrtTensorRTProviderOptions trt_options; + // trt_options.device_id = 0; + // trt_options.has_trt_options = 1; + // trt_options.trt_max_workspace_size = (UIntPtr) (1<<30); + // trt_options.trt_fp16_enable = 1; + // trt_options.trt_int8_enable = 1; + // trt_options.trt_int8_calibration_table_name = "C:\calibration.flatbuffers"; + // trt_options.trt_int8_use_native_calibration_table = 0; + public struct OrtTensorRTProviderOptions + { + public int device_id; // cuda device id. Default is 0. + public int has_trt_options; // override environment variables with following TensorRT settings at runtime. Default 0 = false, nonzero = true. + public UIntPtr trt_max_workspace_size; // maximum workspace size for TensorRT. ORT C++ DLL has this field to be the type of size_t, hence using UIntPtr for conversion. + public int trt_fp16_enable; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true. + public int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true. + public String trt_int8_calibration_table_name; // TensorRT INT8 calibration table name. + public int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true + } + #endregion #region Private Methods @@ -624,6 +707,27 @@ private static bool CheckCudaExecutionProviderDLLs() return true; } + private static bool CheckTensorrtExecutionProviderDLLs() + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + foreach (var dll in trtDelayLoadedLibs) + { + IntPtr handle = LoadLibrary(dll); + if (handle != IntPtr.Zero) + continue; + var sysdir = new StringBuilder(String.Empty, 2048); + GetSystemDirectory(sysdir, (uint)sysdir.Capacity); + throw new OnnxRuntimeException( + ErrorCode.NoSuchFile, + $"kernel32.LoadLibrary():'{dll}' not found. TensorRT/CUDA are required for GPU execution. " + + $". Verify it is available in the system directory={sysdir}. Else copy it to the output folder." + ); + } + } + return true; + } + #endregion #region SafeHandle diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h index 44debc901cb77..237ff72ab0b30 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h @@ -2,12 +2,14 @@ // Licensed under the MIT License. #include "onnxruntime_c_api.h" +#include "core/session/ort_apis.h" #ifdef __cplusplus extern "C" { #endif ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id); +ORT_API_STATUS(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptions* tensorrt_options); #ifdef __cplusplus } diff --git a/onnxruntime/core/providers/tensorrt/symbols.txt b/onnxruntime/core/providers/tensorrt/symbols.txt index 47950c476c5e8..5e555e98a06f2 100644 --- a/onnxruntime/core/providers/tensorrt/symbols.txt +++ b/onnxruntime/core/providers/tensorrt/symbols.txt @@ -1 +1,2 @@ OrtSessionOptionsAppendExecutionProvider_Tensorrt +SessionOptionsAppendExecutionProvider_TensorRT \ No newline at end of file From fb79c1d39d8d61eb5ca515e51618742d7cf40eaf Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 30 Mar 2021 03:18:29 -0700 Subject: [PATCH 02/16] Add comment --- csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs index 2c4cc0ba85f00..c143c2fa604b5 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs @@ -96,7 +96,7 @@ public static SessionOptions MakeSessionOptionWithTensorrtProvider(int deviceId /// A helper method to construct a SessionOptions object for TensorRT execution. /// Use only if CUDA/TensorRT are installed and you have the onnxruntime package specific to this Execution Provider. /// - /// + /// Provider Options for TensorRT EP. /// A SessionsOptions() object configured for execution on deviceId /// public static SessionOptions MakeSessionOptionWithTensorrtProvider(OrtTensorRTProviderOptions trt_options) From 5328c03f893527faf40b17f5136561d24b816dc1 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 30 Mar 2021 06:03:49 -0700 Subject: [PATCH 03/16] Fix bug due to build fail --- .../core/providers/tensorrt/tensorrt_provider_factory.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h index 237ff72ab0b30..e587a74f35d61 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h @@ -9,7 +9,6 @@ extern "C" { #endif ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id); -ORT_API_STATUS(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptions* tensorrt_options); #ifdef __cplusplus } From 5e6e2333e136f6ad179110573bc968d5a69c55d0 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 30 Mar 2021 06:08:40 -0700 Subject: [PATCH 04/16] Remove unnecessary code --- .../core/providers/tensorrt/tensorrt_provider_factory.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h index e587a74f35d61..44debc901cb77 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h @@ -2,7 +2,6 @@ // Licensed under the MIT License. #include "onnxruntime_c_api.h" -#include "core/session/ort_apis.h" #ifdef __cplusplus extern "C" { From 2074557acf75f0ff5962e2d5fd4ab8ca569bb32e Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 31 Mar 2021 21:51:55 -0700 Subject: [PATCH 05/16] Fix bug for documentation check --- .../Microsoft.ML.OnnxRuntime/SessionOptions.cs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs index c143c2fa604b5..9da70ed4cd737 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs @@ -98,7 +98,6 @@ public static SessionOptions MakeSessionOptionWithTensorrtProvider(int deviceId /// /// Provider Options for TensorRT EP. /// A SessionsOptions() object configured for execution on deviceId - /// public static SessionOptions MakeSessionOptionWithTensorrtProvider(OrtTensorRTProviderOptions trt_options) { CheckTensorrtExecutionProviderDLLs(); @@ -654,7 +653,6 @@ public ExecutionMode ExecutionMode /// /// Provider options for TensorRT. /// - /// // Example for setting: // SessionOptions.OrtTensorRTProviderOptions trt_options; // trt_options.device_id = 0; @@ -664,15 +662,16 @@ public ExecutionMode ExecutionMode // trt_options.trt_int8_enable = 1; // trt_options.trt_int8_calibration_table_name = "C:\calibration.flatbuffers"; // trt_options.trt_int8_use_native_calibration_table = 0; + public struct OrtTensorRTProviderOptions { - public int device_id; // cuda device id. Default is 0. - public int has_trt_options; // override environment variables with following TensorRT settings at runtime. Default 0 = false, nonzero = true. - public UIntPtr trt_max_workspace_size; // maximum workspace size for TensorRT. ORT C++ DLL has this field to be the type of size_t, hence using UIntPtr for conversion. - public int trt_fp16_enable; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true. - public int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true. - public String trt_int8_calibration_table_name; // TensorRT INT8 calibration table name. - public int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true + public int device_id; //!< cuda device id. Default is 0. + public int has_trt_options; //!< override environment variables with following TensorRT settings at runtime. Default 0 = false, nonzero = true. + public UIntPtr trt_max_workspace_size; //!< maximum workspace size for TensorRT. ORT C++ DLL has this field to be the type of size_t, hence using UIntPtr for conversion. + public int trt_fp16_enable; //!< enable TensorRT FP16 precision. Default 0 = false, nonzero = true. + public int trt_int8_enable; //!< enable TensorRT INT8 precision. Default 0 = false, nonzero = true. + public String trt_int8_calibration_table_name; //!< TensorRT INT8 calibration table name. + public int trt_int8_use_native_calibration_table; //!< use native TensorRT generated calibration table. Default 0 = false, nonzero = true } #endregion From f016bc451214d8ce9d533a9615c8805aa4f1f531 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 1 Apr 2021 08:22:54 -0700 Subject: [PATCH 06/16] Add test cases --- .../InferenceTest.cs | 51 +++++++++++++++++- .../squeezenet_calibration.flatbuffers | Bin 0 -> 4108 bytes 2 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 csharp/testdata/squeezenet_calibration.flatbuffers diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs index ad401aba1d6ac..83260691c898f 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs @@ -227,6 +227,52 @@ public void CanCreateAndDisposeSessionWithModelPath() } } + [Fact] + private void validateProviderOptions() + { + string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx"); + +#if USE_TENSORRT + string calTablPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet_calibration.flatbuffers"); + //Environment.SetEnvironmentVariable("ORT_TENSORRT_ENGINE_CACHE_ENABLE", "1"); + + SessionOptions.OrtTensorRTProviderOptions trt_options; + trt_options.device_id = 0; + trt_options.trt_int8_calibration_table_name = calTablPath; + trt_options.has_trt_options = 1; + trt_options.trt_max_workspace_size = (UIntPtr)(1 << 30); + trt_options.trt_fp16_enable = 1; + trt_options.trt_int8_enable = 1; + trt_options.trt_int8_use_native_calibration_table = 0; + + var session = new InferenceSession(modelPath, SessionOptions.MakeSessionOptionWithTensorrtProvider(trt_options)); + var inputMeta = session.InputMetadata; + var container = new List(); + float[] inputData = LoadTensorFromFile(@"bench.in"); // this is the data for only one input tensor for this model + foreach (var name in inputMeta.Keys) + { + Assert.Equal(typeof(float), inputMeta[name].ElementType); + Assert.True(inputMeta[name].IsTensor); + var tensor = new DenseTensor(inputData, inputMeta[name].Dimensions); + container.Add(NamedOnnxValue.CreateFromTensor(name, tensor)); + } + + using (var results = session.Run(container)) + { + // Following code is temporarily commented. + // Even though we enable fp16 or int8 through provider options, it could be disabled from TRT EP due to GPU not supporting fp16 or int8. + // Once From/ToProviderOptions() has been implemented in TRT EP, better test cases will be added. + /* + string[] files = Directory.GetFiles(Directory.GetCurrentDirectory(), "*int8*.engine"); + Assert.True(files.Any()); + files = Directory.GetFiles(Directory.GetCurrentDirectory(), "*fp16*.engine"); + Assert.True(files.Any()); + */ + } +#endif + + } + [Theory] [InlineData(GraphOptimizationLevel.ORT_DISABLE_ALL, true)] [InlineData(GraphOptimizationLevel.ORT_DISABLE_ALL, false)] @@ -2349,6 +2395,7 @@ private void VerifyNativeMethodsExist() #endif #if USE_TENSORRT ,"OrtSessionOptionsAppendExecutionProvider_Tensorrt" + ,"SessionOptionsAppendExecutionProvider_TensorRT" #endif #if USE_MIGRAPHX ,"OrtSessionOptionsAppendExecutionProvider_MIGraphX" @@ -2727,7 +2774,7 @@ internal class DisposableListTest : List, IDisposableReadOnlyCollection public DisposableListTest() { } public DisposableListTest(int count) : base(count) { } - #region IDisposable Support +#region IDisposable Support private bool disposedValue = false; // To detect redundant calls protected virtual void Dispose(bool disposing) @@ -2760,6 +2807,6 @@ public void Dispose() Dispose(true); GC.SuppressFinalize(this); } - #endregion +#endregion } } diff --git a/csharp/testdata/squeezenet_calibration.flatbuffers b/csharp/testdata/squeezenet_calibration.flatbuffers new file mode 100644 index 0000000000000000000000000000000000000000..e5cad768f4fe100a2c431aa8251ea85d085951c1 GIT binary patch literal 4108 zcmbVPJy4xR6kQb+6&-X^q@YNV!h%`$?(W^6j&UqfSWu*($P8fuPMl!^Dmsiqk%E#k zg#|^56e&`qNMS)?ks<{pMT!&@DN>|SQBhGm_l4!{+Yd&356;K%y*+z>&hEXtV@f`6 zsZlk8r@~XIPk_nBw zHfFM-Q%D|HRH1MC2BN+ntXiW8e_*?<=(g+ykBg;~!uyU>4W_wt<b@%Ih|w3F*=`|jlqE+{)#_jK)~n1&x0|zq~SUnIF>2oY%xaW`uvNp7W35C zUw*T=_}wC{(Hy>*8I}vnn+?}SO~x4F;Em-JGi*vuU)fz;TbaML;MSa&RZkkO2R9ie zhSSc3;LqSnz$3r(tHQ&2$0N>3&>Gx_c&ZN-EggeATdI4YqRxm;dHLpU9Y$@ z|7r1JNgc;~WBJyN`Be$_(J(w1eA)1lrksPz&KegTJ6UTIzvMSHx_-Rh&yo0gZZ7OD zR9uwXRqS+eD)eOKN^>?14Bw4;+8zsT*=eKK|fv@<2Cw= zg)%n6wl}BQE&9wh+@Q&4CfIcWtBa*iwRVLocJDOY)8Z}qgi4>C4lgJ2o+Da-lVXFp zi$2`wGy2D1tVy%+*`fsyQ(~@b;{X>9f7;=8dwz+P8+~p!T(q8$EPjtmvaRw|b0tsP z4cBYNZjBj>0*xn<)Qi?`(dVGUP1rs$;oN~OG`OaU-J*~Gv&~gE-l7jz`Yctv-daSh z@X=r$TF%65u_%f@zR~CYuv{eSd6ydqrzMX)5Nu8fi7I*WjXszD>c`sinQ#_D!;H+v z;7n(${Zn&ApKBFY=4b6vOx6*IoLsK!YT*`rz8{h+c6Tf8ah^Nz7JYoB&)8u<)>r-} z)N+>@rx?d6$@#13;~IUg49kVX8(nVa3@rXg4{?n?KUUmvjLF)e%H(7Mey|nr>O|X` zD_ Date: Thu, 1 Apr 2021 08:27:18 -0700 Subject: [PATCH 07/16] restore some changes --- csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs index 83260691c898f..21eb28dc6b6b2 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs @@ -2774,7 +2774,7 @@ internal class DisposableListTest : List, IDisposableReadOnlyCollection public DisposableListTest() { } public DisposableListTest(int count) : base(count) { } -#region IDisposable Support + #region IDisposable Support private bool disposedValue = false; // To detect redundant calls protected virtual void Dispose(bool disposing) @@ -2807,6 +2807,6 @@ public void Dispose() Dispose(true); GC.SuppressFinalize(this); } -#endregion + #endregion } } From 767083f1468993923b222a86b67d7a03f8d6b27a Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 5 Apr 2021 01:08:15 -0700 Subject: [PATCH 08/16] fix CI build bug --- .../core/providers/tensorrt/tensorrt_provider_factory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h index 44debc901cb77..e8d6aff9feb9d 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h @@ -8,6 +8,7 @@ extern "C" { #endif ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id); +ORT_API_STATUS(SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, int device_id); #ifdef __cplusplus } From 7f3a544ac4edbbdbbc619376c258cc37b9390797 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 8 Apr 2021 20:25:15 -0700 Subject: [PATCH 09/16] expose all tensorrt env provider options --- .../Microsoft.ML.OnnxRuntime/NativeMethods.cs | 5 +++ .../SessionOptions.cs | 40 ++++++++++++++++++- .../InferenceTest.cs | 12 +++--- .../core/session/onnxruntime_c_api.h | 31 ++++++++------ .../python/onnxruntime_pybind_state.cc | 2 +- onnxruntime/test/onnx/main.cc | 7 +++- onnxruntime/test/perftest/ort_test_session.cc | 10 +++++ onnxruntime/test/util/default_providers.cc | 2 +- 8 files changed, 86 insertions(+), 23 deletions(-) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs index b83a48a3eefa2..815c365d321a2 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs @@ -205,6 +205,11 @@ public struct OrtTensorRTProviderOptionsNative public int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true public IntPtr trt_int8_calibration_table_name; // TensorRT INT8 calibration table name. public int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true + public int trt_max_partition_iterations; // maximum number of iterations allowed in model partitioning for TensorRT. + public int trt_min_subgraph_size; // minimum node size in a subgraph after partitioning. + public int trt_dump_subgraphs; // dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true + public int trt_engine_cache_enable; // enable TensorRT engine caching. Default 0 = false, nonzero = true + public IntPtr trt_cache_path; // specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled. } #endregion diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs index 9da70ed4cd737..c35aefca8d233 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs @@ -124,6 +124,15 @@ public static SessionOptions MakeSessionOptionWithTensorrtProvider(OrtTensorRTPr trt_options_native.trt_int8_calibration_table_name = pinnedSettingsName.Pointer; } trt_options_native.trt_int8_use_native_calibration_table = trt_options.trt_int8_use_native_calibration_table; + trt_options_native.trt_max_partition_iterations = trt_options.trt_max_partition_iterations; + trt_options_native.trt_min_subgraph_size = trt_options.trt_min_subgraph_size; + trt_options_native.trt_dump_subgraphs = trt_options.trt_dump_subgraphs; + trt_options_native.trt_engine_cache_enable = trt_options.trt_engine_cache_enable; + var cachePathPinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(trt_options.trt_cache_path), GCHandleType.Pinned); + using (var pinnedSettingsName2 = new PinnedGCHandle(cachePathPinned)) + { + trt_options_native.trt_cache_path = pinnedSettingsName2.Pointer; + } NativeApiStatus.VerifySuccess(NativeMethods.SessionOptionsAppendExecutionProvider_TensorRT(options.Handle, ref trt_options_native)); @@ -382,6 +391,29 @@ public void AddFreeDimensionOverrideByName(string dimName, long dimValue) NativeApiStatus.VerifySuccess(NativeMethods.OrtAddFreeDimensionOverrideByName(handle, pinnedDimName.Pointer, dimValue)); } } + + /// + /// Get TensorRT provider options with default setting. + /// + /// TRT provider options instance. + public static OrtTensorRTProviderOptions GetDefaultTensorRTProviderOptions() + { + OrtTensorRTProviderOptions trt_options; + trt_options.device_id = 0; + trt_options.has_trt_options = 0; + trt_options.trt_max_workspace_size = (UIntPtr)(1 << 30); + trt_options.trt_fp16_enable = 0; + trt_options.trt_int8_enable = 0; + trt_options.trt_int8_calibration_table_name = ""; + trt_options.trt_int8_use_native_calibration_table = 0; + trt_options.trt_max_partition_iterations = 1000; + trt_options.trt_min_subgraph_size = 1; + trt_options.trt_dump_subgraphs = 0; + trt_options.trt_engine_cache_enable = 0; + trt_options.trt_cache_path = ""; + + return trt_options; + } #endregion internal IntPtr Handle @@ -660,9 +692,8 @@ public ExecutionMode ExecutionMode // trt_options.trt_max_workspace_size = (UIntPtr) (1<<30); // trt_options.trt_fp16_enable = 1; // trt_options.trt_int8_enable = 1; - // trt_options.trt_int8_calibration_table_name = "C:\calibration.flatbuffers"; + // trt_options.trt_int8_calibration_table_name = "calibration.flatbuffers"; // trt_options.trt_int8_use_native_calibration_table = 0; - public struct OrtTensorRTProviderOptions { public int device_id; //!< cuda device id. Default is 0. @@ -672,6 +703,11 @@ public struct OrtTensorRTProviderOptions public int trt_int8_enable; //!< enable TensorRT INT8 precision. Default 0 = false, nonzero = true. public String trt_int8_calibration_table_name; //!< TensorRT INT8 calibration table name. public int trt_int8_use_native_calibration_table; //!< use native TensorRT generated calibration table. Default 0 = false, nonzero = true + public int trt_max_partition_iterations; //!< maximum number of iterations allowed in model partitioning for TensorRT. + public int trt_min_subgraph_size; //!< minimum node size in a subgraph after partitioning. + public int trt_dump_subgraphs; //!< dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true + public int trt_engine_cache_enable; //!< enable TensorRT engine caching. Default 0 = false, nonzero = true + public String trt_cache_path; //!< specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled. } #endregion diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs index 21eb28dc6b6b2..bae0239765c8b 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs @@ -227,16 +227,17 @@ public void CanCreateAndDisposeSessionWithModelPath() } } + + +#if USE_TENSORRT [Fact] - private void validateProviderOptions() + private void validateTensorRTProviderOptions() { string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx"); - -#if USE_TENSORRT string calTablPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet_calibration.flatbuffers"); //Environment.SetEnvironmentVariable("ORT_TENSORRT_ENGINE_CACHE_ENABLE", "1"); - SessionOptions.OrtTensorRTProviderOptions trt_options; + SessionOptions.OrtTensorRTProviderOptions trt_options = SessionOptions.GetDefaultTensorRTProviderOptions(); trt_options.device_id = 0; trt_options.trt_int8_calibration_table_name = calTablPath; trt_options.has_trt_options = 1; @@ -257,6 +258,7 @@ private void validateProviderOptions() container.Add(NamedOnnxValue.CreateFromTensor(name, tensor)); } + using (var results = session.Run(container)) { // Following code is temporarily commented. @@ -269,9 +271,9 @@ private void validateProviderOptions() Assert.True(files.Any()); */ } + } #endif - } [Theory] [InlineData(GraphOptimizationLevel.ORT_DISABLE_ALL, true)] diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index df0b1c221a1f3..db32dfb409b52 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -279,25 +279,30 @@ typedef struct OrtCUDAProviderOptions { /// Options for the ROCM provider that are passed to SessionOptionsAppendExecutionProvider_ROCM /// typedef struct OrtROCMProviderOptions { - int device_id; // hip device with id=0 as default device. - int miopen_conv_exhaustive_search; // miopen conv algo exhaustive search option - size_t hip_mem_limit; // default hip memory limitation to maximum finite value of size_t. - int arena_extend_strategy; // default area extend strategy to KNextPowerOfTwo. + int device_id; // hip device with id=0 as default device. + int miopen_conv_exhaustive_search; // miopen conv algo exhaustive search option + size_t hip_mem_limit; // default hip memory limitation to maximum finite value of size_t. + int arena_extend_strategy; // default area extend strategy to KNextPowerOfTwo. } OrtROCMProviderOptions; /// /// Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT /// typedef struct OrtTensorRTProviderOptions { - int device_id; // cuda device id. - int has_user_compute_stream; // indicator of user specified CUDA compute stream. - void* user_compute_stream; // user specified CUDA compute stream. - int has_trt_options; // override environment variables with following TensorRT settings at runtime. - size_t trt_max_workspace_size; // maximum workspace size for TensorRT. - int trt_fp16_enable; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true - int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true - const char* trt_int8_calibration_table_name; // TensorRT INT8 calibration table name. - int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true + int device_id; // cuda device id. + int has_user_compute_stream; // indicator of user specified CUDA compute stream. + void* user_compute_stream; // user specified CUDA compute stream. + int has_trt_options; // override environment variables with following TensorRT settings at runtime. + size_t trt_max_workspace_size; // maximum workspace size for TensorRT. + int trt_fp16_enable; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true + int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true + const char* trt_int8_calibration_table_name; // TensorRT INT8 calibration table name. + int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true + int max_partition_iterations; // maximum number of iterations allowed in model partitioning for TensorRT. + int min_subgraph_size; // minimum node size in a subgraph after partitioning. + int dump_subgraphs; // dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true + int engine_cache_enable; // enable TensorRT engine caching. Default 0 = false, nonzero = true + const char* cache_path; // specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled. } OrtTensorRTProviderOptions; /// diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 5a26cfd4de673..ba128571c717a 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -494,7 +494,7 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector sess->GetSessionOptions().enable_cpu_mem_arena)); } else if (type == kTensorrtExecutionProvider) { #ifdef USE_TENSORRT - OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0}; + OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0, 1000, 1, 0, 0, nullptr}; std::string trt_int8_calibration_table_name; auto it = provider_options_map.find(type); if (it != provider_options_map.end()) { diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index dd799ac65570c..e71a72459ed78 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -318,7 +318,12 @@ int real_main(int argc, char* argv[], Ort::Env& env) { 0, 0, nullptr, - 0}; ++ 0, ++ 1000, ++ 1, ++ 0, ++ 0, ++ nullptr}; OrtCUDAProviderOptions cuda_options{ 0, diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 7e95baf8d1e3b..2a7f29bc85583 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -68,6 +68,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device bool trt_int8_enable = false; std::string trt_int8_calibration_table_name = ""; bool trt_int8_use_native_calibration_table = false; + int trt_max_partition_iterations = 1000; + int trt_min_subgraph_size = 1; + bool trt_dump_subgraphs = false; + bool trt_engine_cache_enable = false; + std::string trt_cache_path = ""; #ifdef _MSC_VER std::string ov_string = ToMBString(performance_test_config.run_config.ep_runtime_config_string); @@ -145,6 +150,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device tensorrt_options.trt_int8_enable = trt_int8_enable; tensorrt_options.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str(); tensorrt_options.trt_int8_use_native_calibration_table = trt_int8_use_native_calibration_table; + tensorrt_options.trt_max_partition_iterations = trt_max_partition_iterations; + tensorrt_options.trt_min_subgraph_size = trt_min_subgraph_size; + tensorrt_options.trt_dump_subgraphs = trt_dump_subgraphs; + tensorrt_options.trt_engine_cache_enable = trt_engine_cache_enable; + tensorrt_options.trt_cache_path = trt_cache_path.c_str(); session_options.AppendExecutionProvider_TensorRT(tensorrt_options); OrtCUDAProviderOptions cuda_options{ diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index 7cb8e4c216f90..7e455bd0f4d46 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -43,7 +43,7 @@ std::unique_ptr DefaultCpuExecutionProvider(bool enable_aren std::unique_ptr DefaultTensorrtExecutionProvider() { #ifdef USE_TENSORRT - OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0}; + OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0, 1000, 1, 0, 0, nullptr}; if (auto factory = CreateExecutionProviderFactory_Tensorrt(¶ms)) return factory->CreateProvider(); #endif From 9af54d0524fddf65008886f8ff461f9136ddd9df Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 8 Apr 2021 21:05:35 -0700 Subject: [PATCH 10/16] fix typos --- onnxruntime/test/onnx/main.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index e71a72459ed78..ee29c8d6f728c 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -318,12 +318,12 @@ int real_main(int argc, char* argv[], Ort::Env& env) { 0, 0, nullptr, -+ 0, -+ 1000, -+ 1, -+ 0, -+ 0, -+ nullptr}; + 0, + 1000, + 1, + 0, + 0, + nullptr}; OrtCUDAProviderOptions cuda_options{ 0, From ee998f86b73fd864a258f93a2f896b61df4723f6 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 8 Apr 2021 21:48:58 -0700 Subject: [PATCH 11/16] Fix bug --- include/onnxruntime/core/session/onnxruntime_c_api.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index e152de59c8ae6..7d892aacd8b4b 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -298,11 +298,11 @@ typedef struct OrtTensorRTProviderOptions { int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true const char* trt_int8_calibration_table_name; // TensorRT INT8 calibration table name. int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true - int max_partition_iterations; // maximum number of iterations allowed in model partitioning for TensorRT. - int min_subgraph_size; // minimum node size in a subgraph after partitioning. - int dump_subgraphs; // dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true - int engine_cache_enable; // enable TensorRT engine caching. Default 0 = false, nonzero = true - const char* cache_path; // specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled. + int trt_max_partition_iterations; // maximum number of iterations allowed in model partitioning for TensorRT. + int trt_min_subgraph_size; // minimum node size in a subgraph after partitioning. + int trt_dump_subgraphs; // dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true + int trt_engine_cache_enable; // enable TensorRT engine caching. Default 0 = false, nonzero = true + const char* trt_cache_path; // specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled. } OrtTensorRTProviderOptions; /// From 58f2b2f65cbe99f42a08c25dc16993fe2a2dd468 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 9 Apr 2021 01:10:45 -0700 Subject: [PATCH 12/16] fix minor define issue --- .../core/providers/tensorrt/tensorrt_provider_factory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h index e8d6aff9feb9d..9aa5f37ad7010 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h @@ -8,7 +8,7 @@ extern "C" { #endif ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id); -ORT_API_STATUS(SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, int device_id); +ORT_API_STATUS(SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, const OrtTensorRTProviderOptions* tensorrt_options); #ifdef __cplusplus } From ba8b20eced19729b98e0c1ff6465020059e4aa84 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sun, 11 Apr 2021 21:27:38 -0700 Subject: [PATCH 13/16] modify trt ep constructor to take additional trt provider options --- .../tensorrt/tensorrt_execution_provider.cc | 47 ++++++++++++++----- .../tensorrt/tensorrt_execution_provider.h | 5 ++ 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 27ed2be88115d..b8785a36cd44d 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -394,14 +394,22 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv } // Get environment variables - const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations); - if (!max_partition_iterations_env.empty()) { - max_partition_iterations_ = std::stoi(max_partition_iterations_env); + if (info.has_trt_options) { + max_partition_iterations_ = info.max_partition_iterations; + } else { + const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations); + if (!max_partition_iterations_env.empty()) { + max_partition_iterations_ = std::stoi(max_partition_iterations_env); + } } - const std::string min_subgraph_size_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMinSubgraphSize); - if (!min_subgraph_size_env.empty()) { - min_subgraph_size_ = std::stoi(min_subgraph_size_env); + if (info.has_trt_options) { + min_subgraph_size_ = info.min_subgraph_size; + } else { + const std::string min_subgraph_size_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMinSubgraphSize); + if (!min_subgraph_size_env.empty()) { + min_subgraph_size_ = std::stoi(min_subgraph_size_env); + } } if (info.has_trt_options) { @@ -451,19 +459,32 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv } } - const std::string dump_subgraphs_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDumpSubgraphs); - if (!dump_subgraphs_env.empty()) { - dump_subgraphs_ = (std::stoi(dump_subgraphs_env) == 0 ? false : true); + if (info.has_trt_options) { + dump_subgraphs_ = info.dump_subgraphs; + } else { + const std::string dump_subgraphs_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDumpSubgraphs); + if (!dump_subgraphs_env.empty()) { + dump_subgraphs_ = (std::stoi(dump_subgraphs_env) == 0 ? false : true); + } } - const std::string engine_cache_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEngineCacheEnable); - if (!engine_cache_enable_env.empty()) { - engine_cache_enable_ = (std::stoi(engine_cache_enable_env) == 0 ? false : true); + if (info.has_trt_options) { + engine_cache_enable_ = info.engine_cache_enable; + } else { + const std::string engine_cache_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEngineCacheEnable); + if (!engine_cache_enable_env.empty()) { + engine_cache_enable_ = (std::stoi(engine_cache_enable_env) == 0 ? false : true); + } } if (engine_cache_enable_ || int8_enable_) { const std::string engine_cache_path = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEngineCachePath); - cache_path_ = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kCachePath); + if (info.has_trt_options) { + cache_path_ = info.cache_path; + } else { + cache_path_ = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kCachePath); + } + if (!engine_cache_path.empty() && cache_path_.empty()) { cache_path_ = engine_cache_path; LOGS_DEFAULT(WARNING) << "[TensorRT EP] ORT_TENSORRT_ENGINE_CACHE_PATH is deprecated! Please use ORT_TENSORRT_CACHE_PATH to specify engine cache path"; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 29b03954b0c24..16826f81bfba7 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -77,6 +77,11 @@ struct TensorrtExecutionProviderInfo { bool int8_enable{false}; std::string int8_calibration_table_name{""}; bool int8_use_native_calibration_table{false}; + int max_partition_iterations{ 1000 }; + int min_subgraph_size{ 1 }; + int dump_subgraphs{ 0 }; + int engine_cache_enable{ 0 }; + std::string cache_path{ "" }; }; // Information to construct kernel function state. From 478a81c5e7e86663b5168a3e0b1abe605d7f7721 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sun, 11 Apr 2021 21:32:36 -0700 Subject: [PATCH 14/16] minor refine --- .../providers/tensorrt/tensorrt_execution_provider.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 16826f81bfba7..3a56d3d5e7ff1 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -77,11 +77,11 @@ struct TensorrtExecutionProviderInfo { bool int8_enable{false}; std::string int8_calibration_table_name{""}; bool int8_use_native_calibration_table{false}; - int max_partition_iterations{ 1000 }; - int min_subgraph_size{ 1 }; - int dump_subgraphs{ 0 }; - int engine_cache_enable{ 0 }; - std::string cache_path{ "" }; + int max_partition_iterations{1000}; + int min_subgraph_size{1}; + int dump_subgraphs{0}; + int engine_cache_enable{0}; + std::string cache_path{""}; }; // Information to construct kernel function state. From 1cb12c51efa9c349041088478dc4d432532f13c6 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 12 Apr 2021 02:57:23 -0700 Subject: [PATCH 15/16] refactor --- .../ProviderOptions.cs | 111 +++++++++++++++ .../SessionOptions.cs | 130 +++++++----------- .../InferenceTest.cs | 4 +- 3 files changed, 162 insertions(+), 83 deletions(-) create mode 100644 csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs new file mode 100644 index 0000000000000..402fc4ce8ada0 --- /dev/null +++ b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs @@ -0,0 +1,111 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; + +namespace Microsoft.ML.OnnxRuntime +{ + /// + /// Provider options for TensorRT. + /// + // Example for setting: + // SessionOptions.OrtTensorRTProviderOptions trt_options; + // trt_options.device_id = 0; + // trt_options.has_trt_options = 1; + // trt_options.trt_max_workspace_size = (UIntPtr) (1<<30); + // trt_options.trt_fp16_enable = 1; + // trt_options.trt_int8_enable = 1; + // trt_options.trt_int8_calibration_table_name = "calibration.flatbuffers"; + // trt_options.trt_int8_use_native_calibration_table = 0; + public struct OrtTensorRTProviderOptions + { + public int device_id; //!< cuda device id. Default is 0. + public int has_trt_options; //!< override environment variables with following TensorRT settings at runtime. Default 0 = false, nonzero = true. + public UIntPtr trt_max_workspace_size; //!< maximum workspace size for TensorRT. ORT C++ DLL has this field to be the type of size_t, hence using UIntPtr for conversion. + public int trt_fp16_enable; //!< enable TensorRT FP16 precision. Default 0 = false, nonzero = true. + public int trt_int8_enable; //!< enable TensorRT INT8 precision. Default 0 = false, nonzero = true. + public String trt_int8_calibration_table_name; //!< TensorRT INT8 calibration table name. + public int trt_int8_use_native_calibration_table; //!< use native TensorRT generated calibration table. Default 0 = false, nonzero = true + public int trt_max_partition_iterations; //!< maximum number of iterations allowed in model partitioning for TensorRT. + public int trt_min_subgraph_size; //!< minimum node size in a subgraph after partitioning. + public int trt_dump_subgraphs; //!< dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true + public int trt_engine_cache_enable; //!< enable TensorRT engine caching. Default 0 = false, nonzero = true + public String trt_cache_path; //!< specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled. + } + + public class ProviderOptions : SafeHandle + { + internal IntPtr Handle + { + get + { + return handle; + } + } + + #region Constructor and Factory methods + + /// + /// Constructs an empty ProviderOptions + /// + public ProviderOptions() + : base(IntPtr.Zero, true) + { + } + + #endregion + + #region Public Methods + + /// + /// Get TensorRT provider options with default setting. + /// + /// TRT provider options instance. + public static OrtTensorRTProviderOptions GetDefaultTensorRTProviderOptions() + { + OrtTensorRTProviderOptions trt_options; + trt_options.device_id = 0; + trt_options.has_trt_options = 0; + trt_options.trt_max_workspace_size = (UIntPtr)(1 << 30); + trt_options.trt_fp16_enable = 0; + trt_options.trt_int8_enable = 0; + trt_options.trt_int8_calibration_table_name = ""; + trt_options.trt_int8_use_native_calibration_table = 0; + trt_options.trt_max_partition_iterations = 1000; + trt_options.trt_min_subgraph_size = 1; + trt_options.trt_dump_subgraphs = 0; + trt_options.trt_engine_cache_enable = 0; + trt_options.trt_cache_path = ""; + + return trt_options; + } + #endregion + + #region Public Properties + + /// + /// Overrides SafeHandle.IsInvalid + /// + /// returns true if handle is equal to Zero + public override bool IsInvalid { get { return handle == IntPtr.Zero; } } + + #endregion + + #region SafeHandle + /// + /// Overrides SafeHandle.ReleaseHandle() to properly dispose of + /// the native instance of SessionOptions + /// + /// always returns true + protected override bool ReleaseHandle() + { + handle = IntPtr.Zero; + return true; + } + + #endregion + } +} \ No newline at end of file diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs index c35aefca8d233..55f2a5d32f8b5 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs @@ -104,36 +104,7 @@ public static SessionOptions MakeSessionOptionWithTensorrtProvider(OrtTensorRTPr SessionOptions options = new SessionOptions(); OrtTensorRTProviderOptionsNative trt_options_native; - trt_options_native.device_id = trt_options.device_id; - trt_options_native.has_user_compute_stream = 0; - trt_options_native.user_compute_stream = IntPtr.Zero; - trt_options_native.has_trt_options = trt_options.has_trt_options; - if ((ulong)trt_options.trt_max_workspace_size > (1 << 30)) - { - trt_options_native.trt_max_workspace_size = (UIntPtr)(1 << 30); - } - else - { - trt_options_native.trt_max_workspace_size = trt_options.trt_max_workspace_size; - } - trt_options_native.trt_fp16_enable = trt_options.trt_fp16_enable; - trt_options_native.trt_int8_enable = trt_options.trt_int8_enable; - var tableNamePinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(trt_options.trt_int8_calibration_table_name), GCHandleType.Pinned); - using (var pinnedSettingsName = new PinnedGCHandle(tableNamePinned)) - { - trt_options_native.trt_int8_calibration_table_name = pinnedSettingsName.Pointer; - } - trt_options_native.trt_int8_use_native_calibration_table = trt_options.trt_int8_use_native_calibration_table; - trt_options_native.trt_max_partition_iterations = trt_options.trt_max_partition_iterations; - trt_options_native.trt_min_subgraph_size = trt_options.trt_min_subgraph_size; - trt_options_native.trt_dump_subgraphs = trt_options.trt_dump_subgraphs; - trt_options_native.trt_engine_cache_enable = trt_options.trt_engine_cache_enable; - var cachePathPinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(trt_options.trt_cache_path), GCHandleType.Pinned); - using (var pinnedSettingsName2 = new PinnedGCHandle(cachePathPinned)) - { - trt_options_native.trt_cache_path = pinnedSettingsName2.Pointer; - } - + trt_options_native = PrepareNativeTensorRTProviderOptions(trt_options); NativeApiStatus.VerifySuccess(NativeMethods.SessionOptionsAppendExecutionProvider_TensorRT(options.Handle, ref trt_options_native)); NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_CUDA(options.Handle, trt_options.device_id)); @@ -246,6 +217,18 @@ public void AppendExecutionProvider_Tensorrt(int deviceId) NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_Tensorrt(handle, deviceId)); } + /// + /// Use only if you have the onnxruntime package specific to this Execution Provider. + /// + /// Provider Options for TensorRT EP. + public void AppendExecutionProvider_Tensorrt(OrtTensorRTProviderOptions trt_options) + { + OrtTensorRTProviderOptionsNative trt_options_native; + trt_options_native = PrepareNativeTensorRTProviderOptions(trt_options); + + NativeApiStatus.VerifySuccess(NativeMethods.SessionOptionsAppendExecutionProvider_TensorRT(handle, ref trt_options_native)); + } + /// /// Use only if you have the onnxruntime package specific to this Execution Provider. /// @@ -392,28 +375,6 @@ public void AddFreeDimensionOverrideByName(string dimName, long dimValue) } } - /// - /// Get TensorRT provider options with default setting. - /// - /// TRT provider options instance. - public static OrtTensorRTProviderOptions GetDefaultTensorRTProviderOptions() - { - OrtTensorRTProviderOptions trt_options; - trt_options.device_id = 0; - trt_options.has_trt_options = 0; - trt_options.trt_max_workspace_size = (UIntPtr)(1 << 30); - trt_options.trt_fp16_enable = 0; - trt_options.trt_int8_enable = 0; - trt_options.trt_int8_calibration_table_name = ""; - trt_options.trt_int8_use_native_calibration_table = 0; - trt_options.trt_max_partition_iterations = 1000; - trt_options.trt_min_subgraph_size = 1; - trt_options.trt_dump_subgraphs = 0; - trt_options.trt_engine_cache_enable = 0; - trt_options.trt_cache_path = ""; - - return trt_options; - } #endregion internal IntPtr Handle @@ -681,35 +642,6 @@ public ExecutionMode ExecutionMode } private ExecutionMode _executionMode = ExecutionMode.ORT_SEQUENTIAL; - - /// - /// Provider options for TensorRT. - /// - // Example for setting: - // SessionOptions.OrtTensorRTProviderOptions trt_options; - // trt_options.device_id = 0; - // trt_options.has_trt_options = 1; - // trt_options.trt_max_workspace_size = (UIntPtr) (1<<30); - // trt_options.trt_fp16_enable = 1; - // trt_options.trt_int8_enable = 1; - // trt_options.trt_int8_calibration_table_name = "calibration.flatbuffers"; - // trt_options.trt_int8_use_native_calibration_table = 0; - public struct OrtTensorRTProviderOptions - { - public int device_id; //!< cuda device id. Default is 0. - public int has_trt_options; //!< override environment variables with following TensorRT settings at runtime. Default 0 = false, nonzero = true. - public UIntPtr trt_max_workspace_size; //!< maximum workspace size for TensorRT. ORT C++ DLL has this field to be the type of size_t, hence using UIntPtr for conversion. - public int trt_fp16_enable; //!< enable TensorRT FP16 precision. Default 0 = false, nonzero = true. - public int trt_int8_enable; //!< enable TensorRT INT8 precision. Default 0 = false, nonzero = true. - public String trt_int8_calibration_table_name; //!< TensorRT INT8 calibration table name. - public int trt_int8_use_native_calibration_table; //!< use native TensorRT generated calibration table. Default 0 = false, nonzero = true - public int trt_max_partition_iterations; //!< maximum number of iterations allowed in model partitioning for TensorRT. - public int trt_min_subgraph_size; //!< minimum node size in a subgraph after partitioning. - public int trt_dump_subgraphs; //!< dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true - public int trt_engine_cache_enable; //!< enable TensorRT engine caching. Default 0 = false, nonzero = true - public String trt_cache_path; //!< specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled. - } - #endregion #region Private Methods @@ -763,6 +695,42 @@ private static bool CheckTensorrtExecutionProviderDLLs() return true; } + private static OrtTensorRTProviderOptionsNative PrepareNativeTensorRTProviderOptions(OrtTensorRTProviderOptions trt_options) + { + OrtTensorRTProviderOptionsNative trt_options_native; + trt_options_native.device_id = trt_options.device_id; + trt_options_native.has_user_compute_stream = 0; + trt_options_native.user_compute_stream = IntPtr.Zero; + trt_options_native.has_trt_options = trt_options.has_trt_options; + if ((ulong)trt_options.trt_max_workspace_size > (1 << 30)) + { + trt_options_native.trt_max_workspace_size = (UIntPtr)(1 << 30); + } + else + { + trt_options_native.trt_max_workspace_size = trt_options.trt_max_workspace_size; + } + trt_options_native.trt_fp16_enable = trt_options.trt_fp16_enable; + trt_options_native.trt_int8_enable = trt_options.trt_int8_enable; + var tableNamePinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(trt_options.trt_int8_calibration_table_name), GCHandleType.Pinned); + using (var pinnedSettingsName = new PinnedGCHandle(tableNamePinned)) + { + trt_options_native.trt_int8_calibration_table_name = pinnedSettingsName.Pointer; + } + trt_options_native.trt_int8_use_native_calibration_table = trt_options.trt_int8_use_native_calibration_table; + trt_options_native.trt_max_partition_iterations = trt_options.trt_max_partition_iterations; + trt_options_native.trt_min_subgraph_size = trt_options.trt_min_subgraph_size; + trt_options_native.trt_dump_subgraphs = trt_options.trt_dump_subgraphs; + trt_options_native.trt_engine_cache_enable = trt_options.trt_engine_cache_enable; + var cachePathPinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(trt_options.trt_cache_path), GCHandleType.Pinned); + using (var pinnedSettingsName2 = new PinnedGCHandle(cachePathPinned)) + { + trt_options_native.trt_cache_path = pinnedSettingsName2.Pointer; + } + + return trt_options_native; + } + #endregion #region SafeHandle diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs index d0f7a69ed4a3a..404161846a41b 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs @@ -231,13 +231,13 @@ public void CanCreateAndDisposeSessionWithModelPath() #if USE_TENSORRT [Fact] - private void validateTensorRTProviderOptions() + private void TestTensorRTProviderOptions() { string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx"); string calTablPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet_calibration.flatbuffers"); //Environment.SetEnvironmentVariable("ORT_TENSORRT_ENGINE_CACHE_ENABLE", "1"); - SessionOptions.OrtTensorRTProviderOptions trt_options = SessionOptions.GetDefaultTensorRTProviderOptions(); + OrtTensorRTProviderOptions trt_options = ProviderOptions.GetDefaultTensorRTProviderOptions(); trt_options.device_id = 0; trt_options.trt_int8_calibration_table_name = calTablPath; trt_options.has_trt_options = 1; From 532c899adb94ed8eb2b3afc058b26b29106cb8ea Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 12 Apr 2021 03:09:43 -0700 Subject: [PATCH 16/16] add documentation --- csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs index 402fc4ce8ada0..647e0c92a3cbb 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs @@ -36,6 +36,9 @@ public struct OrtTensorRTProviderOptions public String trt_cache_path; //!< specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled. } + /// + /// Holds provider options configuration for creating an InferenceSession. + /// public class ProviderOptions : SafeHandle { internal IntPtr Handle