From b882a42a9430ea54284c554b6fc165f93401100e Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 30 Mar 2021 03:06:45 -0700
Subject: [PATCH 01/16] Enable TensorRT EP for C#

---
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |  21 ++++
 .../SessionOptions.cs                         | 104 ++++++++++++++++++
 .../tensorrt/tensorrt_provider_factory.h      |   2 +
 .../core/providers/tensorrt/symbols.txt       |   1 +
 4 files changed, 128 insertions(+)
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index 0df4c77404898..b83a48a3eefa2 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -192,6 +192,24 @@ public struct OrtApi
         public IntPtr ModelMetadataGetGraphDescription;
     }
 
+    #region ORT Provider options
+    [StructLayout(LayoutKind.Sequential)]
+    public struct OrtTensorRTProviderOptionsNative
+    {
+        public int device_id;                                  // cuda device id.
+        public int has_user_compute_stream;                    // indicator of user specified CUDA compute stream.
+        public IntPtr user_compute_stream;                     // user specified CUDA compute stream.
+        public int has_trt_options;                            // override environment variables with following TensorRT settings at runtime.
+        public UIntPtr trt_max_workspace_size;                 // maximum workspace size for TensorRT.
+        public int trt_fp16_enable;                            // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
+        public int trt_int8_enable;                            // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
+        public IntPtr trt_int8_calibration_table_name;         // TensorRT INT8 calibration table name.
+        public int trt_int8_use_native_calibration_table;      // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
+    }
+    #endregion
+
+
+
     internal static class NativeMethods
     {
         private const string nativeLib = "onnxruntime";
@@ -574,6 +592,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         [DllImport(nativeLib, CharSet = charSet)]
         public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_Tensorrt(IntPtr /*(OrtSessionOptions*)*/ options, int device_id);
 
+        [DllImport(nativeLib, CharSet = charSet)]
+        public static extern IntPtr /*(OrtStatus*)*/ SessionOptionsAppendExecutionProvider_TensorRT(IntPtr /*(OrtSessionOptions*)*/ options, ref OrtTensorRTProviderOptionsNative trt_options);
+
         [DllImport(nativeLib, CharSet = charSet)]
         public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_MIGraphX(IntPtr /*(OrtSessionOptions*)*/ options, int device_id);
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
index 6bc48a0d704da..2c4cc0ba85f00 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@@ -38,6 +38,7 @@ public class SessionOptions : SafeHandle
     {
         // Delay-loaded CUDA or cuDNN DLLs. Currently, delayload is disabled. See cmake/CMakeLists.txt for more information.
         private static string[] cudaDelayLoadedLibs = { };
+        private static string[] trtDelayLoadedLibs = { };
 
         #region Constructor and Factory methods
 
@@ -75,6 +76,63 @@ public static SessionOptions MakeSessionOptionWithCudaProvider(int deviceId = 0)
             return options;
         }
 
+        /// <summary>
+        /// A helper method to construct a SessionOptions object for TensorRT execution.
+        /// Use only if CUDA/TensorRT are installed and you have the onnxruntime package specific to this Execution Provider.
+        /// </summary>
+        /// <param name="deviceId"></param>
+        /// <returns>A SessionsOptions() object configured for execution on deviceId</returns>
+        public static SessionOptions MakeSessionOptionWithTensorrtProvider(int deviceId = 0)
+        {
+            CheckTensorrtExecutionProviderDLLs();
+            SessionOptions options = new SessionOptions();
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_Tensorrt(options.Handle, deviceId));
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_CUDA(options.Handle, deviceId));
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_CPU(options.Handle, 1));
+            return options;
+        }
+
+        /// <summary>
+        /// A helper method to construct a SessionOptions object for TensorRT execution.
+        /// Use only if CUDA/TensorRT are installed and you have the onnxruntime package specific to this Execution Provider.
+        /// </summary>
+        /// <param name="deviceId"></param>
+        /// <returns>A SessionsOptions() object configured for execution on deviceId</returns>
+        /// 
+        public static SessionOptions MakeSessionOptionWithTensorrtProvider(OrtTensorRTProviderOptions trt_options)
+        {
+            CheckTensorrtExecutionProviderDLLs();
+            SessionOptions options = new SessionOptions();
+
+            OrtTensorRTProviderOptionsNative trt_options_native;
+            trt_options_native.device_id = trt_options.device_id;
+            trt_options_native.has_user_compute_stream = 0;
+            trt_options_native.user_compute_stream = IntPtr.Zero;
+            trt_options_native.has_trt_options = trt_options.has_trt_options;
+            if ((ulong)trt_options.trt_max_workspace_size > (1 << 30))
+            {
+                trt_options_native.trt_max_workspace_size = (UIntPtr)(1 << 30);
+            }
+            else
+            {
+                trt_options_native.trt_max_workspace_size = trt_options.trt_max_workspace_size;
+            }
+            trt_options_native.trt_fp16_enable = trt_options.trt_fp16_enable;
+            trt_options_native.trt_int8_enable = trt_options.trt_int8_enable;
+            var tableNamePinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(trt_options.trt_int8_calibration_table_name), GCHandleType.Pinned);
+            using (var pinnedSettingsName = new PinnedGCHandle(tableNamePinned))
+            {
+                trt_options_native.trt_int8_calibration_table_name = pinnedSettingsName.Pointer;
+            }
+            trt_options_native.trt_int8_use_native_calibration_table = trt_options.trt_int8_use_native_calibration_table;
+
+
+            NativeApiStatus.VerifySuccess(NativeMethods.SessionOptionsAppendExecutionProvider_TensorRT(options.Handle, ref trt_options_native));
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_CUDA(options.Handle, trt_options.device_id));
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_CPU(options.Handle, 1));
+            return options;
+        }
+
         /// <summary>
         /// A helper method to construct a SessionOptions object for Nuphar execution.
         /// Use only if you have the onnxruntime package specific to this Execution Provider.
@@ -592,6 +650,31 @@ public ExecutionMode ExecutionMode
         }
         private ExecutionMode _executionMode = ExecutionMode.ORT_SEQUENTIAL;
 
+
+        /// <summary>
+        /// Provider options for TensorRT.
+        /// </summary>
+        /// 
+        //  Example for setting:
+        //    SessionOptions.OrtTensorRTProviderOptions trt_options;
+        //    trt_options.device_id = 0;
+        //    trt_options.has_trt_options = 1;
+        //    trt_options.trt_max_workspace_size = (UIntPtr) (1<<30);
+        //    trt_options.trt_fp16_enable = 1;
+        //    trt_options.trt_int8_enable = 1;
+        //    trt_options.trt_int8_calibration_table_name = "C:\calibration.flatbuffers";
+        //    trt_options.trt_int8_use_native_calibration_table = 0;
+        public struct OrtTensorRTProviderOptions
+        {
+            public int device_id;                                  // cuda device id. Default is 0.
+            public int has_trt_options;                            // override environment variables with following TensorRT settings at runtime. Default 0 = false, nonzero = true.
+            public UIntPtr trt_max_workspace_size;                 // maximum workspace size for TensorRT. ORT C++ DLL has this field to be the type of size_t, hence using UIntPtr for conversion.
+            public int trt_fp16_enable;                            // enable TensorRT FP16 precision. Default 0 = false, nonzero = true.
+            public int trt_int8_enable;                            // enable TensorRT INT8 precision. Default 0 = false, nonzero = true.
+            public String trt_int8_calibration_table_name;         // TensorRT INT8 calibration table name.
+            public int trt_int8_use_native_calibration_table;      // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
+        }
+
         #endregion
 
         #region Private Methods
@@ -624,6 +707,27 @@ private static bool CheckCudaExecutionProviderDLLs()
             return true;
         }
 
+        private static bool CheckTensorrtExecutionProviderDLLs()
+        {
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                foreach (var dll in trtDelayLoadedLibs)
+                {
+                    IntPtr handle = LoadLibrary(dll);
+                    if (handle != IntPtr.Zero)
+                        continue;
+                    var sysdir = new StringBuilder(String.Empty, 2048);
+                    GetSystemDirectory(sysdir, (uint)sysdir.Capacity);
+                    throw new OnnxRuntimeException(
+                        ErrorCode.NoSuchFile,
+                        $"kernel32.LoadLibrary():'{dll}' not found. TensorRT/CUDA are required for GPU execution. " +
+                        $". Verify it is available in the system directory={sysdir}. Else copy it to the output folder."
+                        );
+                }
+            }
+            return true;
+        }
+
 
         #endregion
         #region SafeHandle
diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
index 44debc901cb77..237ff72ab0b30 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
@@ -2,12 +2,14 @@
 // Licensed under the MIT License.
 
 #include "onnxruntime_c_api.h"
+#include "core/session/ort_apis.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id);
+ORT_API_STATUS(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptions* tensorrt_options);
 
 #ifdef __cplusplus
 }
diff --git a/onnxruntime/core/providers/tensorrt/symbols.txt b/onnxruntime/core/providers/tensorrt/symbols.txt
index 47950c476c5e8..5e555e98a06f2 100644
--- a/onnxruntime/core/providers/tensorrt/symbols.txt
+++ b/onnxruntime/core/providers/tensorrt/symbols.txt
@@ -1 +1,2 @@
 OrtSessionOptionsAppendExecutionProvider_Tensorrt
+SessionOptionsAppendExecutionProvider_TensorRT
\ No newline at end of file

From fb79c1d39d8d61eb5ca515e51618742d7cf40eaf Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 30 Mar 2021 03:18:29 -0700
Subject: [PATCH 02/16] Add comment

---
 csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
index 2c4cc0ba85f00..c143c2fa604b5 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@@ -96,7 +96,7 @@ public static SessionOptions MakeSessionOptionWithTensorrtProvider(int deviceId
         /// A helper method to construct a SessionOptions object for TensorRT execution.
         /// Use only if CUDA/TensorRT are installed and you have the onnxruntime package specific to this Execution Provider.
         /// </summary>
-        /// <param name="deviceId"></param>
+        /// <param name="trt_options">Provider Options for TensorRT EP.</param>
         /// <returns>A SessionsOptions() object configured for execution on deviceId</returns>
         /// 
         public static SessionOptions MakeSessionOptionWithTensorrtProvider(OrtTensorRTProviderOptions trt_options)

From 5328c03f893527faf40b17f5136561d24b816dc1 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 30 Mar 2021 06:03:49 -0700
Subject: [PATCH 03/16] Fix bug due to build fail

---
 .../core/providers/tensorrt/tensorrt_provider_factory.h          | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
index 237ff72ab0b30..e587a74f35d61 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
@@ -9,7 +9,6 @@ extern "C" {
 #endif
 
 ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id);
-ORT_API_STATUS(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptions* tensorrt_options);
 
 #ifdef __cplusplus
 }

From 5e6e2333e136f6ad179110573bc968d5a69c55d0 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 30 Mar 2021 06:08:40 -0700
Subject: [PATCH 04/16] Remove unnecessary code

---
 .../core/providers/tensorrt/tensorrt_provider_factory.h          | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
index e587a74f35d61..44debc901cb77 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
@@ -2,7 +2,6 @@
 // Licensed under the MIT License.
 
 #include "onnxruntime_c_api.h"
-#include "core/session/ort_apis.h"
 
 #ifdef __cplusplus
 extern "C" {

From 2074557acf75f0ff5962e2d5fd4ab8ca569bb32e Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Wed, 31 Mar 2021 21:51:55 -0700
Subject: [PATCH 05/16] Fix bug for documentation check

---
 .../Microsoft.ML.OnnxRuntime/SessionOptions.cs  | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
index c143c2fa604b5..9da70ed4cd737 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@@ -98,7 +98,6 @@ public static SessionOptions MakeSessionOptionWithTensorrtProvider(int deviceId
         /// </summary>
         /// <param name="trt_options">Provider Options for TensorRT EP.</param>
         /// <returns>A SessionsOptions() object configured for execution on deviceId</returns>
-        /// 
         public static SessionOptions MakeSessionOptionWithTensorrtProvider(OrtTensorRTProviderOptions trt_options)
         {
             CheckTensorrtExecutionProviderDLLs();
@@ -654,7 +653,6 @@ public ExecutionMode ExecutionMode
         /// <summary>
         /// Provider options for TensorRT.
         /// </summary>
-        /// 
         //  Example for setting:
         //    SessionOptions.OrtTensorRTProviderOptions trt_options;
         //    trt_options.device_id = 0;
@@ -664,15 +662,16 @@ public ExecutionMode ExecutionMode
         //    trt_options.trt_int8_enable = 1;
         //    trt_options.trt_int8_calibration_table_name = "C:\calibration.flatbuffers";
         //    trt_options.trt_int8_use_native_calibration_table = 0;
+
         public struct OrtTensorRTProviderOptions
         {
-            public int device_id;                                  // cuda device id. Default is 0.
-            public int has_trt_options;                            // override environment variables with following TensorRT settings at runtime. Default 0 = false, nonzero = true.
-            public UIntPtr trt_max_workspace_size;                 // maximum workspace size for TensorRT. ORT C++ DLL has this field to be the type of size_t, hence using UIntPtr for conversion.
-            public int trt_fp16_enable;                            // enable TensorRT FP16 precision. Default 0 = false, nonzero = true.
-            public int trt_int8_enable;                            // enable TensorRT INT8 precision. Default 0 = false, nonzero = true.
-            public String trt_int8_calibration_table_name;         // TensorRT INT8 calibration table name.
-            public int trt_int8_use_native_calibration_table;      // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
+            public int device_id;                                  //!< cuda device id. Default is 0. </typeparam>
+            public int has_trt_options;                            //!< override environment variables with following TensorRT settings at runtime. Default 0 = false, nonzero = true.
+            public UIntPtr trt_max_workspace_size;                 //!< maximum workspace size for TensorRT. ORT C++ DLL has this field to be the type of size_t, hence using UIntPtr for conversion.
+            public int trt_fp16_enable;                            //!< enable TensorRT FP16 precision. Default 0 = false, nonzero = true.
+            public int trt_int8_enable;                            //!< enable TensorRT INT8 precision. Default 0 = false, nonzero = true.
+            public String trt_int8_calibration_table_name;         //!< TensorRT INT8 calibration table name.
+            public int trt_int8_use_native_calibration_table;      //!< use native TensorRT generated calibration table. Default 0 = false, nonzero = true
         }
 
         #endregion

From f016bc451214d8ce9d533a9615c8805aa4f1f531 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 1 Apr 2021 08:22:54 -0700
Subject: [PATCH 06/16] Add test cases

---
 .../InferenceTest.cs                          |  51 +++++++++++++++++-
 .../squeezenet_calibration.flatbuffers        | Bin 0 -> 4108 bytes
 2 files changed, 49 insertions(+), 2 deletions(-)
 create mode 100644 csharp/testdata/squeezenet_calibration.flatbuffers

diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index ad401aba1d6ac..83260691c898f 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -227,6 +227,52 @@ public void CanCreateAndDisposeSessionWithModelPath()
             }
         }
 
+        [Fact]
+        private void validateProviderOptions()
+        {
+            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
+
+#if USE_TENSORRT
+            string calTablPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet_calibration.flatbuffers");
+            //Environment.SetEnvironmentVariable("ORT_TENSORRT_ENGINE_CACHE_ENABLE", "1");
+
+            SessionOptions.OrtTensorRTProviderOptions trt_options;
+            trt_options.device_id = 0;
+            trt_options.trt_int8_calibration_table_name = calTablPath;
+            trt_options.has_trt_options = 1;
+            trt_options.trt_max_workspace_size = (UIntPtr)(1 << 30);
+            trt_options.trt_fp16_enable = 1;
+            trt_options.trt_int8_enable = 1;
+            trt_options.trt_int8_use_native_calibration_table = 0;
+
+            var session = new InferenceSession(modelPath, SessionOptions.MakeSessionOptionWithTensorrtProvider(trt_options));
+            var inputMeta = session.InputMetadata;
+            var container = new List<NamedOnnxValue>();
+            float[] inputData = LoadTensorFromFile(@"bench.in"); // this is the data for only one input tensor for this model
+            foreach (var name in inputMeta.Keys)
+            {
+                Assert.Equal(typeof(float), inputMeta[name].ElementType);
+                Assert.True(inputMeta[name].IsTensor);
+                var tensor = new DenseTensor<float>(inputData, inputMeta[name].Dimensions);
+                container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
+            }
+
+            using (var results = session.Run(container))
+            {
+                // Following code is temporarily commented.
+                // Even though we enable fp16 or int8 through provider options, it could be disabled from TRT EP due to GPU not supporting fp16 or int8.
+                // Once From/ToProviderOptions() has been implemented in TRT EP, better test cases will be added.
+                /*
+                string[] files = Directory.GetFiles(Directory.GetCurrentDirectory(), "*int8*.engine");
+                Assert.True(files.Any());
+                files = Directory.GetFiles(Directory.GetCurrentDirectory(), "*fp16*.engine");
+                Assert.True(files.Any());
+                */
+            }
+#endif
+
+        }
+
         [Theory]
         [InlineData(GraphOptimizationLevel.ORT_DISABLE_ALL, true)]
         [InlineData(GraphOptimizationLevel.ORT_DISABLE_ALL, false)]
@@ -2349,6 +2395,7 @@ private void VerifyNativeMethodsExist()
 #endif
 #if USE_TENSORRT
             ,"OrtSessionOptionsAppendExecutionProvider_Tensorrt"
+            ,"SessionOptionsAppendExecutionProvider_TensorRT"
 #endif
 #if USE_MIGRAPHX
             ,"OrtSessionOptionsAppendExecutionProvider_MIGraphX"
@@ -2727,7 +2774,7 @@ internal class DisposableListTest<T> : List<T>, IDisposableReadOnlyCollection<T>
         public DisposableListTest() { }
         public DisposableListTest(int count) : base(count) { }
 
-        #region IDisposable Support
+#region IDisposable Support
         private bool disposedValue = false; // To detect redundant calls
 
         protected virtual void Dispose(bool disposing)
@@ -2760,6 +2807,6 @@ public void Dispose()
             Dispose(true);
             GC.SuppressFinalize(this);
         }
-        #endregion
+#endregion
     }
 }
diff --git a/csharp/testdata/squeezenet_calibration.flatbuffers b/csharp/testdata/squeezenet_calibration.flatbuffers
new file mode 100644
index 0000000000000000000000000000000000000000..e5cad768f4fe100a2c431aa8251ea85d085951c1
GIT binary patch
literal 4108
zcmbVPJy4xR6kQb+6&-X^q@YNV!h%`$?(W^6j&UqfSWu*($P8fuPMl!^Dmsiqk%E#k
zg#|^56e&`qNMS)?ks<{pMT!&@DN>|SQBhGm_l4!{+Yd&356;K%y*+z>&hEXtV@f`6
zsZlk8r@~XIPk_<al=8rJU=!E|_JPUQm0AF{fL&nh4W*`mRp0^e9GJi)SAk97F>nBw
zH<h{x+y$Nj<1lswSO*>fFM-Q%D|HRH1MC2BN+n<gxDV_B7vE882G{_$fqh`|UHAlS
z0lUEHd+-TZ10Dh|fJ<Xa%>tXiW8e_*?<=(g+ykBg;~!uyU>4W_wt<<Sj*ehfO#mMP
zNylv1Q}8JTAAP_(adl;Rd5Pyp-EPLSPB!Ie6Bq%1xEk*;{$VrTX=knT&iU*)#1yOX
zgz<aT_>b@%Ih|w3F*=`|jlqE+{)#_jK)~n1&x0|zq~SUnIF>2oY%xaW`uvNp7W35C
zUw*T=_}wC{(Hy>*8I}vnn+?}SO~x4F;Em-JGi*vuU)fz;TbaML;MSa&RZkkO2R9ie
zhSSc3;LqSnz$3r(<CVB5>tHQ&2$0N>3&>Gx_c&ZN-EggeATdI4YqRxm;dHLpU9Y$@
z|7r1JNgc;~WBJyN`Be$_(J(w1eA)1lrksPz&KegTJ6UTIzvMSHx_-Rh&yo0gZZ7OD
zR9u<g867f1P~rsBWUCydsX<-u4#|aC^{nEG&p{i)w#wuP3T1;-p6FWa7F{p?IvDSH
zxnlQf#XT$DqHCHu))mCfiYNYNjkOnYNW}0qT8DQ<pV;X0WLPfjj{G(ltJj9JkCuZ;
z3C~pJDK`2{S6uOpj3#V6l$U_2l(WbD5{s;<nv2(Jz2RDIp-v1oIOFk@V%c(wK0j1k
z83Ug~^xotYf^*Ku{HJl{{eG9*iMQwzD}5$^@8_u(D{0OqJJd$!DdvCzRrCpsJ~xNu
z!r^-rS7M4f#TXa~Vj?oBu5F>wXRqS+eD)eOKN^>?14Bw4;+8zsT*=eKK|fv@<2Cw=
zg)%n6wl}BQE&9wh+@Q&4CfIcWtBa*iwRVLocJDOY)8Z}qgi4>C4lgJ2o+Da-lVXFp
zi$2`wGy2D1tVy%+*`fsyQ(~@b;{X>9f7;=8dwz+P8+~p!T(q8$EPjtmvaRw|b0tsP
z4cBYNZjBj>0*xn<)Qi?`(dVGUP1rs$;oN~OG`OaU-J*~Gv&~gE-l7jz`Yctv-daSh
z@X=r$TF%65u_%f@zR~CYuv{eSd6ydqrzMX)5Nu8fi7I*WjXszD>c`sinQ#_D!;H+v
z;7n(${Zn&ApKBFY=4b6vOx6*IoLsK!YT*`rz8{h+c6Tf8ah^Nz7JYoB&)8u<)>r-}
z)N+>@rx?d6$@#13;~IUg49kVX8(nVa3@rXg4{?n?KUUmvjLF)e%H(7Mey|nr>O|X`
zD_<N|T$!KI_j)@g;+)Xb)xs_Mq`%u-osTPaS3BJ9nZM}cDt#VQJc*Tc#?1m6W$$pu
zuy}Q%k8SjMQE?^Kh<=F@!y%*98Y{=t)46c?@;`&IqJ`i%j<w8z1KELdxoz~h-f&^p
zTl6h#0ESgl?}3^tdD?2Y=<$X^LbP5^_Eh(u!Y%qd?QkhOk`p=3yl7$dU83fS-3v$m
z9dFUcR{DH)%)?bt;aaOmo8BFHVSaUf&fxCY%zM_|4JX2H<ubMPzs8f=sm9Ab#a+a5
kjJQpDve*<3$(FlbZ+_FeHsSv<V5IlH+*{AVl{Be;0f!T0tN;K2

literal 0
HcmV?d00001


From d274fe3297f89a93b7bd1dd50c7f1660b1363969 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 1 Apr 2021 08:27:18 -0700
Subject: [PATCH 07/16] restore some changes

---
 csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index 83260691c898f..21eb28dc6b6b2 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -2774,7 +2774,7 @@ internal class DisposableListTest<T> : List<T>, IDisposableReadOnlyCollection<T>
         public DisposableListTest() { }
         public DisposableListTest(int count) : base(count) { }
 
-#region IDisposable Support
+        #region IDisposable Support
         private bool disposedValue = false; // To detect redundant calls
 
         protected virtual void Dispose(bool disposing)
@@ -2807,6 +2807,6 @@ public void Dispose()
             Dispose(true);
             GC.SuppressFinalize(this);
         }
-#endregion
+        #endregion
     }
 }

From 767083f1468993923b222a86b67d7a03f8d6b27a Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 5 Apr 2021 01:08:15 -0700
Subject: [PATCH 08/16] fix CI build bug

---
 .../core/providers/tensorrt/tensorrt_provider_factory.h          | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
index 44debc901cb77..e8d6aff9feb9d 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
@@ -8,6 +8,7 @@ extern "C" {
 #endif
 
 ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id);
+ORT_API_STATUS(SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, int device_id);
 
 #ifdef __cplusplus
 }

From 7f3a544ac4edbbdbbc619376c258cc37b9390797 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 8 Apr 2021 20:25:15 -0700
Subject: [PATCH 09/16] expose all tensorrt env provider options

---
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |  5 +++
 .../SessionOptions.cs                         | 40 ++++++++++++++++++-
 .../InferenceTest.cs                          | 12 +++---
 .../core/session/onnxruntime_c_api.h          | 31 ++++++++------
 .../python/onnxruntime_pybind_state.cc        |  2 +-
 onnxruntime/test/onnx/main.cc                 |  7 +++-
 onnxruntime/test/perftest/ort_test_session.cc | 10 +++++
 onnxruntime/test/util/default_providers.cc    |  2 +-
 8 files changed, 86 insertions(+), 23 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index b83a48a3eefa2..815c365d321a2 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -205,6 +205,11 @@ public struct OrtTensorRTProviderOptionsNative
         public int trt_int8_enable;                            // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
         public IntPtr trt_int8_calibration_table_name;         // TensorRT INT8 calibration table name.
         public int trt_int8_use_native_calibration_table;      // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
+        public int trt_max_partition_iterations;               // maximum number of iterations allowed in model partitioning for TensorRT.
+        public int trt_min_subgraph_size;                      // minimum node size in a subgraph after partitioning.
+        public int trt_dump_subgraphs;                         // dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true
+        public int trt_engine_cache_enable;                    // enable TensorRT engine caching. Default 0 = false, nonzero = true
+        public IntPtr trt_cache_path;                          // specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled.
     }
     #endregion
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
index 9da70ed4cd737..c35aefca8d233 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@@ -124,6 +124,15 @@ public static SessionOptions MakeSessionOptionWithTensorrtProvider(OrtTensorRTPr
                 trt_options_native.trt_int8_calibration_table_name = pinnedSettingsName.Pointer;
             }
             trt_options_native.trt_int8_use_native_calibration_table = trt_options.trt_int8_use_native_calibration_table;
+            trt_options_native.trt_max_partition_iterations = trt_options.trt_max_partition_iterations;
+            trt_options_native.trt_min_subgraph_size = trt_options.trt_min_subgraph_size;
+            trt_options_native.trt_dump_subgraphs = trt_options.trt_dump_subgraphs;
+            trt_options_native.trt_engine_cache_enable = trt_options.trt_engine_cache_enable;
+            var cachePathPinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(trt_options.trt_cache_path), GCHandleType.Pinned);
+            using (var pinnedSettingsName2 = new PinnedGCHandle(cachePathPinned))
+            {
+                trt_options_native.trt_cache_path = pinnedSettingsName2.Pointer;
+            }
 
 
             NativeApiStatus.VerifySuccess(NativeMethods.SessionOptionsAppendExecutionProvider_TensorRT(options.Handle, ref trt_options_native));
@@ -382,6 +391,29 @@ public void AddFreeDimensionOverrideByName(string dimName, long dimValue)
                 NativeApiStatus.VerifySuccess(NativeMethods.OrtAddFreeDimensionOverrideByName(handle, pinnedDimName.Pointer, dimValue));
             }
         }
+
+        /// <summary>
+        /// Get TensorRT provider options with default setting.
+        /// </summary>
+        /// <returns> TRT provider options instance.  </returns>
+        public static OrtTensorRTProviderOptions GetDefaultTensorRTProviderOptions()
+        {
+            OrtTensorRTProviderOptions trt_options;
+            trt_options.device_id = 0;
+            trt_options.has_trt_options = 0;
+            trt_options.trt_max_workspace_size = (UIntPtr)(1 << 30);
+            trt_options.trt_fp16_enable = 0;
+            trt_options.trt_int8_enable = 0;
+            trt_options.trt_int8_calibration_table_name = "";
+            trt_options.trt_int8_use_native_calibration_table = 0;
+            trt_options.trt_max_partition_iterations = 1000;
+            trt_options.trt_min_subgraph_size = 1;
+            trt_options.trt_dump_subgraphs = 0;
+            trt_options.trt_engine_cache_enable = 0;
+            trt_options.trt_cache_path = "";
+
+            return trt_options;
+        }
         #endregion
 
         internal IntPtr Handle
@@ -660,9 +692,8 @@ public ExecutionMode ExecutionMode
         //    trt_options.trt_max_workspace_size = (UIntPtr) (1<<30);
         //    trt_options.trt_fp16_enable = 1;
         //    trt_options.trt_int8_enable = 1;
-        //    trt_options.trt_int8_calibration_table_name = "C:\calibration.flatbuffers";
+        //    trt_options.trt_int8_calibration_table_name = "calibration.flatbuffers";
         //    trt_options.trt_int8_use_native_calibration_table = 0;
-
         public struct OrtTensorRTProviderOptions
         {
             public int device_id;                                  //!< cuda device id. Default is 0. </typeparam>
@@ -672,6 +703,11 @@ public struct OrtTensorRTProviderOptions
             public int trt_int8_enable;                            //!< enable TensorRT INT8 precision. Default 0 = false, nonzero = true.
             public String trt_int8_calibration_table_name;         //!< TensorRT INT8 calibration table name.
             public int trt_int8_use_native_calibration_table;      //!< use native TensorRT generated calibration table. Default 0 = false, nonzero = true
+            public int trt_max_partition_iterations;               //!< maximum number of iterations allowed in model partitioning for TensorRT.
+            public int trt_min_subgraph_size;                      //!< minimum node size in a subgraph after partitioning.
+            public int trt_dump_subgraphs;                         //!< dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true
+            public int trt_engine_cache_enable;                    //!< enable TensorRT engine caching. Default 0 = false, nonzero = true
+            public String trt_cache_path;                          //!< specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled.
         }
 
         #endregion
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index 21eb28dc6b6b2..bae0239765c8b 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -227,16 +227,17 @@ public void CanCreateAndDisposeSessionWithModelPath()
             }
         }
 
+
+
+#if USE_TENSORRT
         [Fact]
-        private void validateProviderOptions()
+        private void validateTensorRTProviderOptions()
         {
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
-
-#if USE_TENSORRT
             string calTablPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet_calibration.flatbuffers");
             //Environment.SetEnvironmentVariable("ORT_TENSORRT_ENGINE_CACHE_ENABLE", "1");
 
-            SessionOptions.OrtTensorRTProviderOptions trt_options;
+            SessionOptions.OrtTensorRTProviderOptions trt_options = SessionOptions.GetDefaultTensorRTProviderOptions();
             trt_options.device_id = 0;
             trt_options.trt_int8_calibration_table_name = calTablPath;
             trt_options.has_trt_options = 1;
@@ -257,6 +258,7 @@ private void validateProviderOptions()
                 container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
             }
 
+
             using (var results = session.Run(container))
             {
                 // Following code is temporarily commented.
@@ -269,9 +271,9 @@ private void validateProviderOptions()
                 Assert.True(files.Any());
                 */
             }
+    }
 #endif
 
-        }
 
         [Theory]
         [InlineData(GraphOptimizationLevel.ORT_DISABLE_ALL, true)]
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index df0b1c221a1f3..db32dfb409b52 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -279,25 +279,30 @@ typedef struct OrtCUDAProviderOptions {
 /// Options for the ROCM provider that are passed to SessionOptionsAppendExecutionProvider_ROCM
 /// </summary>
 typedef struct OrtROCMProviderOptions {
-  int device_id;                                    // hip device with id=0 as default device.
-  int miopen_conv_exhaustive_search;                // miopen conv algo exhaustive search option
-  size_t hip_mem_limit;                             // default hip memory limitation to maximum finite value of size_t.
-  int arena_extend_strategy;                        // default area extend strategy to KNextPowerOfTwo.
+  int device_id;                      // hip device with id=0 as default device.
+  int miopen_conv_exhaustive_search;  // miopen conv algo exhaustive search option
+  size_t hip_mem_limit;               // default hip memory limitation to maximum finite value of size_t.
+  int arena_extend_strategy;          // default area extend strategy to KNextPowerOfTwo.
 } OrtROCMProviderOptions;
 
 /// <summary>
 /// Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT
 /// </summary>
 typedef struct OrtTensorRTProviderOptions {
-  int device_id;                                  // cuda device id.
-  int has_user_compute_stream;                    // indicator of user specified CUDA compute stream.
-  void* user_compute_stream;                      // user specified CUDA compute stream.
-  int has_trt_options;                            // override environment variables with following TensorRT settings at runtime.
-  size_t trt_max_workspace_size;                  // maximum workspace size for TensorRT.
-  int trt_fp16_enable;                            // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
-  int trt_int8_enable;                            // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
-  const char* trt_int8_calibration_table_name;    // TensorRT INT8 calibration table name.
-  int trt_int8_use_native_calibration_table;      // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
+  int device_id;                                // cuda device id.
+  int has_user_compute_stream;                  // indicator of user specified CUDA compute stream.
+  void* user_compute_stream;                    // user specified CUDA compute stream.
+  int has_trt_options;                          // override environment variables with following TensorRT settings at runtime.
+  size_t trt_max_workspace_size;                // maximum workspace size for TensorRT.
+  int trt_fp16_enable;                          // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
+  int trt_int8_enable;                          // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
+  const char* trt_int8_calibration_table_name;  // TensorRT INT8 calibration table name.
+  int trt_int8_use_native_calibration_table;    // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
+  int max_partition_iterations;                 // maximum number of iterations allowed in model partitioning for TensorRT.
+  int min_subgraph_size;                        // minimum node size in a subgraph after partitioning.
+  int dump_subgraphs;                           // dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true
+  int engine_cache_enable;                      // enable TensorRT engine caching. Default 0 = false, nonzero = true
+  const char* cache_path;                       // specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled.
 } OrtTensorRTProviderOptions;
 
 /// <summary>
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 5a26cfd4de673..ba128571c717a 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -494,7 +494,7 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
                                           sess->GetSessionOptions().enable_cpu_mem_arena));
     } else if (type == kTensorrtExecutionProvider) {
 #ifdef USE_TENSORRT
-      OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0};
+      OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0, 1000, 1, 0, 0, nullptr};
       std::string trt_int8_calibration_table_name;
       auto it = provider_options_map.find(type);
       if (it != provider_options_map.end()) {
diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc
index dd799ac65570c..e71a72459ed78 100644
--- a/onnxruntime/test/onnx/main.cc
+++ b/onnxruntime/test/onnx/main.cc
@@ -318,7 +318,12 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
           0,
           0,
           nullptr,
-          0};
++         0,
++         1000,
++         1,
++         0,
++         0,
++         nullptr};
 
       OrtCUDAProviderOptions cuda_options{
           0,
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index 7e95baf8d1e3b..2a7f29bc85583 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -68,6 +68,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
     bool trt_int8_enable = false;
     std::string trt_int8_calibration_table_name = "";
     bool trt_int8_use_native_calibration_table = false;
+    int trt_max_partition_iterations = 1000;
+    int trt_min_subgraph_size = 1;
+    bool trt_dump_subgraphs = false;
+    bool trt_engine_cache_enable = false;
+    std::string trt_cache_path = "";
 
     #ifdef _MSC_VER
     std::string ov_string = ToMBString(performance_test_config.run_config.ep_runtime_config_string);
@@ -145,6 +150,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
     tensorrt_options.trt_int8_enable = trt_int8_enable;
     tensorrt_options.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str();
     tensorrt_options.trt_int8_use_native_calibration_table = trt_int8_use_native_calibration_table;
+    tensorrt_options.trt_max_partition_iterations = trt_max_partition_iterations;
+    tensorrt_options.trt_min_subgraph_size = trt_min_subgraph_size;
+    tensorrt_options.trt_dump_subgraphs = trt_dump_subgraphs;
+    tensorrt_options.trt_engine_cache_enable = trt_engine_cache_enable;
+    tensorrt_options.trt_cache_path = trt_cache_path.c_str();
     session_options.AppendExecutionProvider_TensorRT(tensorrt_options);
 
     OrtCUDAProviderOptions cuda_options{
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index 7cb8e4c216f90..7e455bd0f4d46 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -43,7 +43,7 @@ std::unique_ptr<IExecutionProvider> DefaultCpuExecutionProvider(bool enable_aren
 
 std::unique_ptr<IExecutionProvider> DefaultTensorrtExecutionProvider() {
 #ifdef USE_TENSORRT
-  OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0};
+  OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0, 1000, 1, 0, 0, nullptr};
   if (auto factory = CreateExecutionProviderFactory_Tensorrt(&params))
     return factory->CreateProvider();
 #endif

From 9af54d0524fddf65008886f8ff461f9136ddd9df Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 8 Apr 2021 21:05:35 -0700
Subject: [PATCH 10/16] fix typos

---
 onnxruntime/test/onnx/main.cc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc
index e71a72459ed78..ee29c8d6f728c 100644
--- a/onnxruntime/test/onnx/main.cc
+++ b/onnxruntime/test/onnx/main.cc
@@ -318,12 +318,12 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
           0,
           0,
           nullptr,
-+         0,
-+         1000,
-+         1,
-+         0,
-+         0,
-+         nullptr};
+          0,
+          1000,
+          1,
+          0,
+          0,
+          nullptr};
 
       OrtCUDAProviderOptions cuda_options{
           0,

From ee998f86b73fd864a258f93a2f896b61df4723f6 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 8 Apr 2021 21:48:58 -0700
Subject: [PATCH 11/16] Fix bug

---
 include/onnxruntime/core/session/onnxruntime_c_api.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index e152de59c8ae6..7d892aacd8b4b 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -298,11 +298,11 @@ typedef struct OrtTensorRTProviderOptions {
   int trt_int8_enable;                          // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
   const char* trt_int8_calibration_table_name;  // TensorRT INT8 calibration table name.
   int trt_int8_use_native_calibration_table;    // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
-  int max_partition_iterations;                 // maximum number of iterations allowed in model partitioning for TensorRT.
-  int min_subgraph_size;                        // minimum node size in a subgraph after partitioning.
-  int dump_subgraphs;                           // dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true
-  int engine_cache_enable;                      // enable TensorRT engine caching. Default 0 = false, nonzero = true
-  const char* cache_path;                       // specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled.
+  int trt_max_partition_iterations;             // maximum number of iterations allowed in model partitioning for TensorRT.
+  int trt_min_subgraph_size;                    // minimum node size in a subgraph after partitioning.
+  int trt_dump_subgraphs;                       // dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true
+  int trt_engine_cache_enable;                  // enable TensorRT engine caching. Default 0 = false, nonzero = true
+  const char* trt_cache_path;                   // specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled.
 } OrtTensorRTProviderOptions;
 
 /// <summary>

From 58f2b2f65cbe99f42a08c25dc16993fe2a2dd468 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 9 Apr 2021 01:10:45 -0700
Subject: [PATCH 12/16] fix minor define issue

---
 .../core/providers/tensorrt/tensorrt_provider_factory.h         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
index e8d6aff9feb9d..9aa5f37ad7010 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h
@@ -8,7 +8,7 @@ extern "C" {
 #endif
 
 ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id);
-ORT_API_STATUS(SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, int device_id);
+ORT_API_STATUS(SessionOptionsAppendExecutionProvider_TensorRT, _In_ OrtSessionOptions* options, const OrtTensorRTProviderOptions* tensorrt_options);
 
 #ifdef __cplusplus
 }

From ba8b20eced19729b98e0c1ff6465020059e4aa84 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sun, 11 Apr 2021 21:27:38 -0700
Subject: [PATCH 13/16] modify trt ep constructor to take additional trt
 provider options

---
 .../tensorrt/tensorrt_execution_provider.cc   | 47 ++++++++++++++-----
 .../tensorrt/tensorrt_execution_provider.h    |  5 ++
 2 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 27ed2be88115d..b8785a36cd44d 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -394,14 +394,22 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
   }
 
   // Get environment variables
-  const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations);
-  if (!max_partition_iterations_env.empty()) {
-    max_partition_iterations_ = std::stoi(max_partition_iterations_env);
+  if (info.has_trt_options) {
+    max_partition_iterations_ = info.max_partition_iterations;
+  } else {
+    const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations);
+    if (!max_partition_iterations_env.empty()) {
+      max_partition_iterations_ = std::stoi(max_partition_iterations_env);
+    }
   }
 
-  const std::string min_subgraph_size_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMinSubgraphSize);
-  if (!min_subgraph_size_env.empty()) {
-    min_subgraph_size_ = std::stoi(min_subgraph_size_env);
+  if (info.has_trt_options) {
+    min_subgraph_size_ = info.min_subgraph_size;
+  } else {
+    const std::string min_subgraph_size_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMinSubgraphSize);
+    if (!min_subgraph_size_env.empty()) {
+      min_subgraph_size_ = std::stoi(min_subgraph_size_env);
+    }
   }
 
   if (info.has_trt_options) {
@@ -451,19 +459,32 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     }
   }
 
-  const std::string dump_subgraphs_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDumpSubgraphs);
-  if (!dump_subgraphs_env.empty()) {
-    dump_subgraphs_ = (std::stoi(dump_subgraphs_env) == 0 ? false : true);
+  if (info.has_trt_options) {
+    dump_subgraphs_ = info.dump_subgraphs;
+  } else {
+    const std::string dump_subgraphs_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDumpSubgraphs);
+    if (!dump_subgraphs_env.empty()) {
+      dump_subgraphs_ = (std::stoi(dump_subgraphs_env) == 0 ? false : true);
+    }
   }
 
-  const std::string engine_cache_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEngineCacheEnable);
-  if (!engine_cache_enable_env.empty()) {
-    engine_cache_enable_ = (std::stoi(engine_cache_enable_env) == 0 ? false : true);
+  if (info.has_trt_options) {
+    engine_cache_enable_ = info.engine_cache_enable;
+  } else {
+    const std::string engine_cache_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEngineCacheEnable);
+    if (!engine_cache_enable_env.empty()) {
+      engine_cache_enable_ = (std::stoi(engine_cache_enable_env) == 0 ? false : true);
+    }
   }
 
   if (engine_cache_enable_ || int8_enable_) {
     const std::string engine_cache_path = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEngineCachePath);
-    cache_path_ = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kCachePath);
+    if (info.has_trt_options) {
+      cache_path_ = info.cache_path;
+    } else {
+      cache_path_ = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kCachePath);
+    }
+
     if (!engine_cache_path.empty() && cache_path_.empty()) {
       cache_path_ = engine_cache_path;
       LOGS_DEFAULT(WARNING) << "[TensorRT EP] ORT_TENSORRT_ENGINE_CACHE_PATH is deprecated! Please use ORT_TENSORRT_CACHE_PATH to specify engine cache path";
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index 29b03954b0c24..16826f81bfba7 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -77,6 +77,11 @@ struct TensorrtExecutionProviderInfo {
   bool int8_enable{false}; 
   std::string int8_calibration_table_name{""};
   bool int8_use_native_calibration_table{false};
+  int max_partition_iterations{ 1000 };
+  int min_subgraph_size{ 1 };
+  int dump_subgraphs{ 0 };
+  int engine_cache_enable{ 0 };
+  std::string cache_path{ "" };
 };
 
 // Information to construct kernel function state.

From 478a81c5e7e86663b5168a3e0b1abe605d7f7721 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sun, 11 Apr 2021 21:32:36 -0700
Subject: [PATCH 14/16] minor refine

---
 .../providers/tensorrt/tensorrt_execution_provider.h   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index 16826f81bfba7..3a56d3d5e7ff1 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -77,11 +77,11 @@ struct TensorrtExecutionProviderInfo {
   bool int8_enable{false}; 
   std::string int8_calibration_table_name{""};
   bool int8_use_native_calibration_table{false};
-  int max_partition_iterations{ 1000 };
-  int min_subgraph_size{ 1 };
-  int dump_subgraphs{ 0 };
-  int engine_cache_enable{ 0 };
-  std::string cache_path{ "" };
+  int max_partition_iterations{1000};
+  int min_subgraph_size{1};
+  int dump_subgraphs{0};
+  int engine_cache_enable{0};
+  std::string cache_path{""};
 };
 
 // Information to construct kernel function state.

From 1cb12c51efa9c349041088478dc4d432532f13c6 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 12 Apr 2021 02:57:23 -0700
Subject: [PATCH 15/16] refactor

---
 .../ProviderOptions.cs                        | 111 +++++++++++++++
 .../SessionOptions.cs                         | 130 +++++++-----------
 .../InferenceTest.cs                          |   4 +-
 3 files changed, 162 insertions(+), 83 deletions(-)
 create mode 100644 csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs
new file mode 100644
index 0000000000000..402fc4ce8ada0
--- /dev/null
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs
@@ -0,0 +1,111 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.InteropServices;
+
+namespace Microsoft.ML.OnnxRuntime
+{
+    /// <summary>
+    /// Provider options for TensorRT.
+    /// </summary>
+    //  Example for setting:
+    //    SessionOptions.OrtTensorRTProviderOptions trt_options;
+    //    trt_options.device_id = 0;
+    //    trt_options.has_trt_options = 1;
+    //    trt_options.trt_max_workspace_size = (UIntPtr) (1<<30);
+    //    trt_options.trt_fp16_enable = 1;
+    //    trt_options.trt_int8_enable = 1;
+    //    trt_options.trt_int8_calibration_table_name = "calibration.flatbuffers";
+    //    trt_options.trt_int8_use_native_calibration_table = 0;
+    public struct OrtTensorRTProviderOptions
+    {
+        public int device_id;                                  //!< cuda device id. Default is 0. </typeparam>
+        public int has_trt_options;                            //!< override environment variables with following TensorRT settings at runtime. Default 0 = false, nonzero = true.
+        public UIntPtr trt_max_workspace_size;                 //!< maximum workspace size for TensorRT. ORT C++ DLL has this field to be the type of size_t, hence using UIntPtr for conversion.
+        public int trt_fp16_enable;                            //!< enable TensorRT FP16 precision. Default 0 = false, nonzero = true.
+        public int trt_int8_enable;                            //!< enable TensorRT INT8 precision. Default 0 = false, nonzero = true.
+        public String trt_int8_calibration_table_name;         //!< TensorRT INT8 calibration table name.
+        public int trt_int8_use_native_calibration_table;      //!< use native TensorRT generated calibration table. Default 0 = false, nonzero = true
+        public int trt_max_partition_iterations;               //!< maximum number of iterations allowed in model partitioning for TensorRT.
+        public int trt_min_subgraph_size;                      //!< minimum node size in a subgraph after partitioning.
+        public int trt_dump_subgraphs;                         //!< dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true
+        public int trt_engine_cache_enable;                    //!< enable TensorRT engine caching. Default 0 = false, nonzero = true
+        public String trt_cache_path;                          //!< specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled.
+    }
+
+    public class ProviderOptions : SafeHandle
+    {
+        internal IntPtr Handle
+        {
+            get
+            {
+                return handle;
+            }
+        }
+
+        #region Constructor and Factory methods
+
+        /// <summary>
+        /// Constructs an empty ProviderOptions
+        /// </summary>
+        public ProviderOptions()
+            : base(IntPtr.Zero, true)
+        {
+        }
+
+        #endregion
+
+        #region Public Methods
+
+        /// <summary>
+        /// Get TensorRT provider options with default setting.
+        /// </summary>
+        /// <returns> TRT provider options instance.  </returns>
+        public static OrtTensorRTProviderOptions GetDefaultTensorRTProviderOptions()
+        {
+            OrtTensorRTProviderOptions trt_options;
+            trt_options.device_id = 0;
+            trt_options.has_trt_options = 0;
+            trt_options.trt_max_workspace_size = (UIntPtr)(1 << 30);
+            trt_options.trt_fp16_enable = 0;
+            trt_options.trt_int8_enable = 0;
+            trt_options.trt_int8_calibration_table_name = "";
+            trt_options.trt_int8_use_native_calibration_table = 0;
+            trt_options.trt_max_partition_iterations = 1000;
+            trt_options.trt_min_subgraph_size = 1;
+            trt_options.trt_dump_subgraphs = 0;
+            trt_options.trt_engine_cache_enable = 0;
+            trt_options.trt_cache_path = "";
+
+            return trt_options;
+        }
+        #endregion
+
+        #region Public Properties
+
+        /// <summary>
+        /// Overrides SafeHandle.IsInvalid
+        /// </summary>
+        /// <value>returns true if handle is equal to Zero</value>
+        public override bool IsInvalid { get { return handle == IntPtr.Zero; } }
+
+        #endregion
+
+        #region SafeHandle
+        /// <summary>
+        /// Overrides SafeHandle.ReleaseHandle() to properly dispose of
+        /// the native instance of SessionOptions
+        /// </summary>
+        /// <returns>always returns true</returns>
+        protected override bool ReleaseHandle()
+        {
+            handle = IntPtr.Zero;
+            return true;
+        }
+
+        #endregion
+    }
+}
\ No newline at end of file
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
index c35aefca8d233..55f2a5d32f8b5 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@@ -104,36 +104,7 @@ public static SessionOptions MakeSessionOptionWithTensorrtProvider(OrtTensorRTPr
             SessionOptions options = new SessionOptions();
 
             OrtTensorRTProviderOptionsNative trt_options_native;
-            trt_options_native.device_id = trt_options.device_id;
-            trt_options_native.has_user_compute_stream = 0;
-            trt_options_native.user_compute_stream = IntPtr.Zero;
-            trt_options_native.has_trt_options = trt_options.has_trt_options;
-            if ((ulong)trt_options.trt_max_workspace_size > (1 << 30))
-            {
-                trt_options_native.trt_max_workspace_size = (UIntPtr)(1 << 30);
-            }
-            else
-            {
-                trt_options_native.trt_max_workspace_size = trt_options.trt_max_workspace_size;
-            }
-            trt_options_native.trt_fp16_enable = trt_options.trt_fp16_enable;
-            trt_options_native.trt_int8_enable = trt_options.trt_int8_enable;
-            var tableNamePinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(trt_options.trt_int8_calibration_table_name), GCHandleType.Pinned);
-            using (var pinnedSettingsName = new PinnedGCHandle(tableNamePinned))
-            {
-                trt_options_native.trt_int8_calibration_table_name = pinnedSettingsName.Pointer;
-            }
-            trt_options_native.trt_int8_use_native_calibration_table = trt_options.trt_int8_use_native_calibration_table;
-            trt_options_native.trt_max_partition_iterations = trt_options.trt_max_partition_iterations;
-            trt_options_native.trt_min_subgraph_size = trt_options.trt_min_subgraph_size;
-            trt_options_native.trt_dump_subgraphs = trt_options.trt_dump_subgraphs;
-            trt_options_native.trt_engine_cache_enable = trt_options.trt_engine_cache_enable;
-            var cachePathPinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(trt_options.trt_cache_path), GCHandleType.Pinned);
-            using (var pinnedSettingsName2 = new PinnedGCHandle(cachePathPinned))
-            {
-                trt_options_native.trt_cache_path = pinnedSettingsName2.Pointer;
-            }
-
+            trt_options_native = PrepareNativeTensorRTProviderOptions(trt_options);
 
             NativeApiStatus.VerifySuccess(NativeMethods.SessionOptionsAppendExecutionProvider_TensorRT(options.Handle, ref trt_options_native));
             NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_CUDA(options.Handle, trt_options.device_id));
@@ -246,6 +217,18 @@ public void AppendExecutionProvider_Tensorrt(int deviceId)
             NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_Tensorrt(handle, deviceId));
         }
 
+        /// <summary>
+        /// Use only if you have the onnxruntime package specific to this Execution Provider.
+        /// </summary>
+        /// <param name="trt_options">Provider Options for TensorRT EP.</param>
+        public void AppendExecutionProvider_Tensorrt(OrtTensorRTProviderOptions trt_options)
+        {
+            OrtTensorRTProviderOptionsNative trt_options_native;
+            trt_options_native = PrepareNativeTensorRTProviderOptions(trt_options);
+
+            NativeApiStatus.VerifySuccess(NativeMethods.SessionOptionsAppendExecutionProvider_TensorRT(handle, ref trt_options_native));
+        }
+
         /// <summary>
         /// Use only if you have the onnxruntime package specific to this Execution Provider.
         /// </summary>
@@ -392,28 +375,6 @@ public void AddFreeDimensionOverrideByName(string dimName, long dimValue)
             }
         }
 
-        /// <summary>
-        /// Get TensorRT provider options with default setting.
-        /// </summary>
-        /// <returns> TRT provider options instance.  </returns>
-        public static OrtTensorRTProviderOptions GetDefaultTensorRTProviderOptions()
-        {
-            OrtTensorRTProviderOptions trt_options;
-            trt_options.device_id = 0;
-            trt_options.has_trt_options = 0;
-            trt_options.trt_max_workspace_size = (UIntPtr)(1 << 30);
-            trt_options.trt_fp16_enable = 0;
-            trt_options.trt_int8_enable = 0;
-            trt_options.trt_int8_calibration_table_name = "";
-            trt_options.trt_int8_use_native_calibration_table = 0;
-            trt_options.trt_max_partition_iterations = 1000;
-            trt_options.trt_min_subgraph_size = 1;
-            trt_options.trt_dump_subgraphs = 0;
-            trt_options.trt_engine_cache_enable = 0;
-            trt_options.trt_cache_path = "";
-
-            return trt_options;
-        }
         #endregion
 
         internal IntPtr Handle
@@ -681,35 +642,6 @@ public ExecutionMode ExecutionMode
         }
         private ExecutionMode _executionMode = ExecutionMode.ORT_SEQUENTIAL;
 
-
-        /// <summary>
-        /// Provider options for TensorRT.
-        /// </summary>
-        //  Example for setting:
-        //    SessionOptions.OrtTensorRTProviderOptions trt_options;
-        //    trt_options.device_id = 0;
-        //    trt_options.has_trt_options = 1;
-        //    trt_options.trt_max_workspace_size = (UIntPtr) (1<<30);
-        //    trt_options.trt_fp16_enable = 1;
-        //    trt_options.trt_int8_enable = 1;
-        //    trt_options.trt_int8_calibration_table_name = "calibration.flatbuffers";
-        //    trt_options.trt_int8_use_native_calibration_table = 0;
-        public struct OrtTensorRTProviderOptions
-        {
-            public int device_id;                                  //!< cuda device id. Default is 0. </typeparam>
-            public int has_trt_options;                            //!< override environment variables with following TensorRT settings at runtime. Default 0 = false, nonzero = true.
-            public UIntPtr trt_max_workspace_size;                 //!< maximum workspace size for TensorRT. ORT C++ DLL has this field to be the type of size_t, hence using UIntPtr for conversion.
-            public int trt_fp16_enable;                            //!< enable TensorRT FP16 precision. Default 0 = false, nonzero = true.
-            public int trt_int8_enable;                            //!< enable TensorRT INT8 precision. Default 0 = false, nonzero = true.
-            public String trt_int8_calibration_table_name;         //!< TensorRT INT8 calibration table name.
-            public int trt_int8_use_native_calibration_table;      //!< use native TensorRT generated calibration table. Default 0 = false, nonzero = true
-            public int trt_max_partition_iterations;               //!< maximum number of iterations allowed in model partitioning for TensorRT.
-            public int trt_min_subgraph_size;                      //!< minimum node size in a subgraph after partitioning.
-            public int trt_dump_subgraphs;                         //!< dump the subgraphs that are transformed into TRT engines in onnx format to the filesystem. Default 0 = false, nonzero = true
-            public int trt_engine_cache_enable;                    //!< enable TensorRT engine caching. Default 0 = false, nonzero = true
-            public String trt_cache_path;                          //!< specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled.
-        }
-
         #endregion
 
         #region Private Methods
@@ -763,6 +695,42 @@ private static bool CheckTensorrtExecutionProviderDLLs()
             return true;
         }
 
+        private static OrtTensorRTProviderOptionsNative PrepareNativeTensorRTProviderOptions(OrtTensorRTProviderOptions trt_options)
+        {
+            OrtTensorRTProviderOptionsNative trt_options_native;
+            trt_options_native.device_id = trt_options.device_id;
+            trt_options_native.has_user_compute_stream = 0;
+            trt_options_native.user_compute_stream = IntPtr.Zero;
+            trt_options_native.has_trt_options = trt_options.has_trt_options;
+            if ((ulong)trt_options.trt_max_workspace_size > (1 << 30))
+            {
+                trt_options_native.trt_max_workspace_size = (UIntPtr)(1 << 30);
+            }
+            else
+            {
+                trt_options_native.trt_max_workspace_size = trt_options.trt_max_workspace_size;
+            }
+            trt_options_native.trt_fp16_enable = trt_options.trt_fp16_enable;
+            trt_options_native.trt_int8_enable = trt_options.trt_int8_enable;
+            var tableNamePinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(trt_options.trt_int8_calibration_table_name), GCHandleType.Pinned);
+            using (var pinnedSettingsName = new PinnedGCHandle(tableNamePinned))
+            {
+                trt_options_native.trt_int8_calibration_table_name = pinnedSettingsName.Pointer;
+            }
+            trt_options_native.trt_int8_use_native_calibration_table = trt_options.trt_int8_use_native_calibration_table;
+            trt_options_native.trt_max_partition_iterations = trt_options.trt_max_partition_iterations;
+            trt_options_native.trt_min_subgraph_size = trt_options.trt_min_subgraph_size;
+            trt_options_native.trt_dump_subgraphs = trt_options.trt_dump_subgraphs;
+            trt_options_native.trt_engine_cache_enable = trt_options.trt_engine_cache_enable;
+            var cachePathPinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(trt_options.trt_cache_path), GCHandleType.Pinned);
+            using (var pinnedSettingsName2 = new PinnedGCHandle(cachePathPinned))
+            {
+                trt_options_native.trt_cache_path = pinnedSettingsName2.Pointer;
+            }
+
+            return trt_options_native;
+        }
+
 
         #endregion
         #region SafeHandle
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index d0f7a69ed4a3a..404161846a41b 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -231,13 +231,13 @@ public void CanCreateAndDisposeSessionWithModelPath()
 
 #if USE_TENSORRT
         [Fact]
-        private void validateTensorRTProviderOptions()
+        private void TestTensorRTProviderOptions()
         {
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
             string calTablPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet_calibration.flatbuffers");
             //Environment.SetEnvironmentVariable("ORT_TENSORRT_ENGINE_CACHE_ENABLE", "1");
 
-            SessionOptions.OrtTensorRTProviderOptions trt_options = SessionOptions.GetDefaultTensorRTProviderOptions();
+            OrtTensorRTProviderOptions trt_options = ProviderOptions.GetDefaultTensorRTProviderOptions();
             trt_options.device_id = 0;
             trt_options.trt_int8_calibration_table_name = calTablPath;
             trt_options.has_trt_options = 1;

From 532c899adb94ed8eb2b3afc058b26b29106cb8ea Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 12 Apr 2021 03:09:43 -0700
Subject: [PATCH 16/16] add documentation

---
 csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs
index 402fc4ce8ada0..647e0c92a3cbb 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.cs
@@ -36,6 +36,9 @@ public struct OrtTensorRTProviderOptions
         public String trt_cache_path;                          //!< specify path for TensorRT engine and profile files if engine_cache_enable is enabled, or INT8 calibration table file if trt_int8_enable is enabled.
     }
 
+    /// <summary>
+    /// Holds provider options configuration for creating an InferenceSession.
+    /// </summary>
     public class ProviderOptions : SafeHandle
     {
         internal IntPtr Handle