From 3ba51a231386276f819393f88db4dab62afe6db3 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 23 Dec 2021 19:19:44 +0000
Subject: [PATCH 01/30] add timing cache

---
 .../tensorrt/tensorrt_execution_provider.cc   | 103 +++++++++++++++++-
 .../tensorrt/tensorrt_execution_provider.h    |   3 +
 2 files changed, 105 insertions(+), 1 deletion(-)
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 3f0751151fd2a..a2df547c5869b 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -19,6 +19,9 @@
 #include <limits>
 #include <map>
 #include <memory>
+#include <chrono>
+#include <unistd.h>
+#include <iostream>
 #include "flatbuffers/idl.h"
 #include "ort_trt_int8_cal_table.fbs.h"
 
@@ -264,6 +267,36 @@ bool SetDynamicRange(nvinfer1::INetworkDefinition& network, std::unordered_map<s
   }
   return true;
 }
+
+inline std::vector<char> loadTimingCacheFile(const std::string inFileName)
+{
+    std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
+    if (!iFile)
+    {
+        LOGS_DEFAULT(WARNING) << "[TensorRT EP] Could not read timing cache from: " << inFileName
+                              << ". A new timing cache will be generated and written.";
+        return std::vector<char>();
+    }
+    iFile.seekg(0, std::ifstream::end);
+    size_t fsize = iFile.tellg();
+    iFile.seekg(0, std::ifstream::beg);
+    std::vector<char> content(fsize);
+    iFile.read(content.data(), fsize);
+    iFile.close();
+    return content;
+}
+
+inline void saveTimingCacheFile(const std::string outFileName, const nvinfer1::IHostMemory* blob)
+{
+    std::ofstream oFile(outFileName, std::ios::out | std::ios::binary);
+    if (!oFile)
+    {
+        LOGS_DEFAULT(WARNING) << "[TensorRT EP] Could not write timing cache to: " << outFileName;
+        return;
+    }
+    oFile.write((char*) blob->data(), blob->size());
+    oFile.close();
+}
 }  // namespace
 
 namespace google {
@@ -519,6 +552,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     if (!force_sequential_engine_build_env.empty()) {
       force_sequential_engine_build_ = (std::stoi(force_sequential_engine_build_env) == 0 ? false : true);
     }
+
+    const std::string timing_cache_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kTimingCacheEnable);
+    if (!timing_cache_enable_env.empty()) {
+      timing_cache_enable_ = (std::stoi(timing_cache_enable_env) == 0 ? false : true);
+    }
   }
 
   // Validate setting
@@ -1289,6 +1327,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
     if (!has_dynamic_shape) {
       const std::string cache_path = GetCachePath(cache_path_, trt_node_name_with_precision);
       const std::string engine_cache_path = cache_path + ".engine";
+      const std::string timing_cache_path = cache_path + ".timing";
       std::ifstream engine_file(engine_cache_path, std::ios::binary | std::ios::in);
       if (engine_cache_enable_ && engine_file) {
         engine_file.seekg(0, std::ios::end);
@@ -1331,10 +1370,29 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
           }
         }
 
+        LOGS_DEFAULT(WARNING) << timing_cache_enable_;
+        LOGS_DEFAULT(WARNING) << timing_cache_path;
+
+        // Load timing cache from file. Create a fresh cache if the file doesn't exist
+        std::unique_ptr<nvinfer1::ITimingCache> timing_cache = nullptr;
+        if (timing_cache_enable_) {
+            std::vector<char> loaded_timing_cache = loadTimingCacheFile(timing_cache_path);
+            timing_cache.reset(trt_config->createTimingCache(static_cast<const void*>(loaded_timing_cache.data()), loaded_timing_cache.size()));
+            if (timing_cache == nullptr) {
+              return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
+                                     "TensorRT EP could not create timing cache: " + timing_cache_path);
+            }
+            trt_config->setTimingCache(*timing_cache, false);
+        }
+
         // Build engine
         {
           auto lock = GetEngineBuildLock();
+          auto start = std::chrono::high_resolution_clock::now();
           trt_engine = tensorrt_ptr::unique_pointer<nvinfer1::ICudaEngine>(trt_builder->buildEngineWithConfig(*trt_network, *trt_config));
+          auto end = std::chrono::high_resolution_clock::now();
+          std::chrono::duration<double> duration = end - start;
+          LOGS_DEFAULT(WARNING) << "Elapsed time (in Compile) in milliseconds: " << duration.count();
         }
         if (trt_engine == nullptr) {
           return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
@@ -1356,6 +1414,18 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
           serializedModel->destroy();
           LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized " + engine_cache_path;
         }
+
+        // serialize and save timing cache
+        if (timing_cache_enable_)
+        {
+            auto timing_cache = trt_config->getTimingCache();
+            std::unique_ptr<nvinfer1::IHostMemory> timingCacheHostData{timing_cache->serialize()};
+            if (timingCacheHostData == nullptr) {
+              return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
+                                     "TensorRT EP could not serialize timing cache: " + timing_cache_path);
+            }
+            saveTimingCacheFile(timing_cache_path, timingCacheHostData.get());
+        }
       }
 
       // Build context
@@ -1409,7 +1479,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
             &networks_[context->node_name], input_info_[context->node_name], output_info_[context->node_name],
             input_shape_ranges_[context->node_name], &tensorrt_mu_, fp16_enable_, int8_enable_, int8_calibration_cache_available_,
             dla_enable_, dla_core_, &max_workspace_size_, trt_node_name_with_precision, engine_cache_enable_, cache_path_,
-            runtime_.get(), nullptr, allocator_, dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_};
+            runtime_.get(), nullptr, allocator_, dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_, timing_cache_enable_};
       *state = p.release();
       return 0;
     };
@@ -1445,6 +1515,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
       const std::string cache_path = GetCachePath(trt_state->engine_cache_path, trt_state->trt_node_name_with_precision);
       const std::string engine_cache_path = cache_path + ".engine";
       const std::string profile_cache_path = cache_path + ".profile";
+      const std::string timing_cache_path = cache_path + ".timing";
       if (trt_state->engine_cache_enable && trt_engine == nullptr) {
         std::ifstream engine_file(engine_cache_path, std::ios::binary | std::ios::in);
         std::ifstream profile_file(profile_cache_path, std::ios::binary | std::ios::in);
@@ -1672,11 +1743,29 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
           trt_config->setDLACore(trt_state->dla_core);
         }
 
+        LOGS_DEFAULT(WARNING) << timing_cache_enable_;
+        LOGS_DEFAULT(WARNING) << timing_cache_path;
+        // Load timing cache from file. Create a fresh cache if the file doesn't exist
+        std::unique_ptr<nvinfer1::ITimingCache> timing_cache = nullptr;
+        if (trt_state->timing_cache_enable) {
+            std::vector<char> loaded_timing_cache = loadTimingCacheFile(timing_cache_path);
+            timing_cache.reset(trt_config->createTimingCache(static_cast<const void*>(loaded_timing_cache.data()), loaded_timing_cache.size()));
+            if (timing_cache == nullptr) {
+              return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
+                                     "TensorRT EP could not create timing cache: " + timing_cache_path);
+            }
+            trt_config->setTimingCache(*timing_cache, false);
+        }
+
         // Build engine
         {
           auto lock = GetEngineBuildLock();
+          auto start = std::chrono::high_resolution_clock::now();
           *(trt_state->engine) = tensorrt_ptr::unique_pointer<nvinfer1::ICudaEngine>(
               trt_builder->buildEngineWithConfig(*trt_state->network->get(), *trt_config));
+          auto end = std::chrono::high_resolution_clock::now();
+          std::chrono::duration<double> duration = end - start;
+          LOGS_DEFAULT(WARNING) << "Elapsed time (in compute_func) in milliseconds: " << duration.count();
         }
         if (trt_state->engine == nullptr) {
           return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP Failed to Build Engine.");
@@ -1703,6 +1792,18 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
           serializedModel->destroy();
         }
 
+        // serialize and save timing cache
+        if (trt_state->timing_cache_enable)
+        {
+            auto timing_cache = trt_config->getTimingCache();
+            std::unique_ptr<nvinfer1::IHostMemory> timingCacheHostData{timing_cache->serialize()};
+            if (timingCacheHostData == nullptr) {
+              return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
+                                     "TensorRT EP could not serialize timing cache: " + timing_cache_path);
+            }
+            saveTimingCacheFile(timing_cache_path, timingCacheHostData.get());
+        }
+
         // Build context
         *(trt_state->context) = tensorrt_ptr::unique_pointer<nvinfer1::IExecutionContext>(
             trt_state->engine->get()->createExecutionContext());
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index ee759228ae1fa..e8bbd44ea961a 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -26,6 +26,7 @@ static const std::string kCachePath = "ORT_TENSORRT_CACHE_PATH";
 static const std::string kDecryptionEnable = "ORT_TENSORRT_ENGINE_DECRYPTION_ENABLE";
 static const std::string kDecryptionLibPath = "ORT_TENSORRT_ENGINE_DECRYPTION_LIB_PATH";
 static const std::string kForceSequentialEngineBuild= "ORT_TENSORRT_FORCE_SEQUENTIAL_ENGINE_BUILD";
+static const std::string kTimingCacheEnable = "ORT_TENSORRT_TIMING_CACHE_ENABLE";
 // Old env variable for backward compatibility
 static const std::string kEngineCachePath = "ORT_TENSORRT_ENGINE_CACHE_PATH";
 }  // namespace tensorrt_env_vars
@@ -107,6 +108,7 @@ struct TensorrtFuncState {
   bool engine_decryption_enable;
   int (*engine_decryption)(const char*, char*, size_t*);
   int (*engine_encryption)(const char*, char*, size_t);
+  bool timing_cache_enable;
 };
 
 // Logical device representation.
@@ -167,6 +169,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   bool engine_decryption_enable_ = false;
   int (*engine_decryption_)(const char*, char*, size_t*);
   int (*engine_encryption_)(const char*, char*, size_t);
+  bool timing_cache_enable_ = false;
 
   std::unordered_map<std::string, tensorrt_ptr::unique_pointer<nvonnxparser::IParser>> parsers_;
   std::unordered_map<std::string, tensorrt_ptr::unique_pointer<nvinfer1::ICudaEngine>> engines_;

From f905d355c765f04b24dbd2dc3c1ca0d3c2a23686 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 6 Jan 2022 18:55:09 +0000
Subject: [PATCH 02/30] enable timing cache for test

---
 .../core/providers/tensorrt/tensorrt_execution_provider.h       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index e8bbd44ea961a..46f90565f88a0 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -169,7 +169,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   bool engine_decryption_enable_ = false;
   int (*engine_decryption_)(const char*, char*, size_t*);
   int (*engine_encryption_)(const char*, char*, size_t);
-  bool timing_cache_enable_ = false;
+  bool timing_cache_enable_ = true;
 
   std::unordered_map<std::string, tensorrt_ptr::unique_pointer<nvonnxparser::IParser>> parsers_;
   std::unordered_map<std::string, tensorrt_ptr::unique_pointer<nvinfer1::ICudaEngine>> engines_;

From 086ba0ef87fd269231842cc0c0733936599c6f83 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 10 Jan 2022 07:34:06 +0000
Subject: [PATCH 03/30] Make it only on Linux

---
 onnxruntime/test/providers/cpu/model_tests.cc | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index fbdb421e5fa46..847c48b519fd9 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -990,7 +990,43 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
   return v;
 }
 
+auto GenerateCustomTestName  = [](const ::testing::TestParamInfo<ModelTest::ParamType>& info) {
+  // use info.param here to generate the test suffix
+  std::basic_string<ORTCHAR_T> name = info.param;
+
+  // the original name here is the combination of provider name and model path name
+  // remove the trailing 'xxxxxxx/model.onnx' of name
+  if (name.size() > 11 && name.substr(name.size() - 11) == ORT_TSTR("/model.onnx")) {
+    name = name.substr(0, info.param.size() - 11);
+  }
+  // remove the trailing 'xxxxxx.onnx' of name
+  else if (name.size() > 5 && name.substr(name.size() - 5) == ORT_TSTR(".onnx")) {
+    name = name.substr(0, info.param.size() - 5);
+  }
+
+  // Note: test name only accepts '_' and alphanumeric
+  // replace '/' with '_' since '_'
+  std::replace(name.begin(), name.end(), '/', '_');
+
+  // Note: test name only accepts '_' and alphanumeric
+  // remove '.' and '-'
+  char chars[] = ".-";
+  for (unsigned int i = 0; i < strlen(chars); ++i) {
+    name.erase(std::remove(name.begin(), name.end(), chars[i]), name.end());
+  }
+
+  return name;
+};
+
+// The optional last argument is a function or functor that generates custom test name suffixes based on the test parameters.
+// Specify the last argument to make test name more meaningful and clear instead of just the sequential number.
+#ifdef _WIN32
+// Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string<char...>. We use wchar_t on Windows and will encounter error.     
+// So, we don't provide custom test name on Windows now.
 INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()));
+#else
+INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()), GenerateCustomTestName);
+#endif
 
 }  // namespace test
 }  // namespace onnxruntime

From 191424b563a7f020c971498a7a2555ddce84ef5e Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 14 Jan 2022 22:18:05 +0000
Subject: [PATCH 04/30] undo last commit

---
 onnxruntime/test/providers/cpu/model_tests.cc | 36 -------------------
 1 file changed, 36 deletions(-)

diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 847c48b519fd9..fbdb421e5fa46 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -990,43 +990,7 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
   return v;
 }
 
-auto GenerateCustomTestName  = [](const ::testing::TestParamInfo<ModelTest::ParamType>& info) {
-  // use info.param here to generate the test suffix
-  std::basic_string<ORTCHAR_T> name = info.param;
-
-  // the original name here is the combination of provider name and model path name
-  // remove the trailing 'xxxxxxx/model.onnx' of name
-  if (name.size() > 11 && name.substr(name.size() - 11) == ORT_TSTR("/model.onnx")) {
-    name = name.substr(0, info.param.size() - 11);
-  }
-  // remove the trailing 'xxxxxx.onnx' of name
-  else if (name.size() > 5 && name.substr(name.size() - 5) == ORT_TSTR(".onnx")) {
-    name = name.substr(0, info.param.size() - 5);
-  }
-
-  // Note: test name only accepts '_' and alphanumeric
-  // replace '/' with '_' since '_'
-  std::replace(name.begin(), name.end(), '/', '_');
-
-  // Note: test name only accepts '_' and alphanumeric
-  // remove '.' and '-'
-  char chars[] = ".-";
-  for (unsigned int i = 0; i < strlen(chars); ++i) {
-    name.erase(std::remove(name.begin(), name.end(), chars[i]), name.end());
-  }
-
-  return name;
-};
-
-// The optional last argument is a function or functor that generates custom test name suffixes based on the test parameters.
-// Specify the last argument to make test name more meaningful and clear instead of just the sequential number.
-#ifdef _WIN32
-// Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string<char...>. We use wchar_t on Windows and will encounter error.     
-// So, we don't provide custom test name on Windows now.
 INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()));
-#else
-INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()), GenerateCustomTestName);
-#endif
 
 }  // namespace test
 }  // namespace onnxruntime

From 4fe5a0a25f5cf47f728da5050607a2966351dda7 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 14 Jan 2022 23:57:51 +0000
Subject: [PATCH 05/30] add 'timing_cache_enable' tensorrt provider options

---
 .../tensorrt/tensorrt_provider_options.h      |  5 +++-
 .../tensorrt_execution_provider_info.cc       |  3 ++
 .../tensorrt_execution_provider_info.h        |  1 +
 .../tensorrt/tensorrt_provider_factory.cc     |  6 ++--
 .../core/session/provider_bridge_ort.cc       | 30 +++++++++++++++++++
 5 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
index 43b0b938f130b..9e063fc2d015e 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
@@ -5,7 +5,9 @@
 
 /// <summary>
 /// Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT_V2.
-/// Please note that this struct is identical to OrtTensorRTProviderOptions but only to be used internally. 
+/// Please note that this struct is *similar* to OrtTensorRTProviderOptions but only to be used internally. 
+/// Going forward, new trt provider options are to be supported via this struct and usage of the publicly defined
+/// OrtTensorRTProviderOptions will be deprecated over time.
 /// User can only get the instance of OrtTensorRTProviderOptionsV2 via CreateTensorRTProviderOptions.
 /// </summary>
 struct OrtTensorRTProviderOptionsV2 {
@@ -27,4 +29,5 @@ struct OrtTensorRTProviderOptionsV2 {
   int trt_engine_decryption_enable;             // enable engine decryption. Default 0 = false, nonzero = true
   const char* trt_engine_decryption_lib_path;   // specify engine decryption library path
   int trt_force_sequential_engine_build;        // force building TensorRT engine sequentially. Default 0 = false, nonzero = true
+  int trt_timing_cache_enable;                  // enable TensorRT timing cache. Default 0 = false, nonzero = true
 };
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
index cfc43350a210e..97b4466372870 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
@@ -27,6 +27,7 @@ constexpr const char* kCachePath = "trt_engine_cache_path";
 constexpr const char* kDecryptionEnable = "trt_engine_decryption_enable";
 constexpr const char* kDecryptionLibPath = "trt_engine_decryption_lib_path";
 constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine_build";
+constexpr const char* kTimingCacheEnable = "trt_timing_cache_enable";
 }  // namespace provider_option_names
 }  // namespace tensorrt 
 
@@ -63,6 +64,7 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions
           .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionEnable, info.engine_decryption_enable)
           .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path) 
           .AddAssignmentToReference(tensorrt::provider_option_names::kForceSequentialEngineBuild, info.force_sequential_engine_build)
+          .AddAssignmentToReference(tensorrt::provider_option_names::kTimingCacheEnable, info.timing_cache_enable)
           .Parse(options));
 
   return info;
@@ -87,6 +89,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
       {tensorrt::provider_option_names::kDecryptionEnable, MakeStringWithClassicLocale(info.engine_decryption_enable)},
       {tensorrt::provider_option_names::kDecryptionLibPath, MakeStringWithClassicLocale(info.engine_decryption_lib_path)},
       {tensorrt::provider_option_names::kForceSequentialEngineBuild, MakeStringWithClassicLocale(info.force_sequential_engine_build)},
+      {tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.timing_cache_enable)},
   };
   return options;
 }
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
index b6d879bf72558..e1bee9ec6f5ce 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
@@ -31,6 +31,7 @@ struct TensorrtExecutionProviderInfo {
   bool engine_decryption_enable{false};
   std::string engine_decryption_lib_path{""};
   bool force_sequential_engine_build{false};
+  bool timing_cache_enable{false};
 
   static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
   static ProviderOptions ToProviderOptions(const TensorrtExecutionProviderInfo& info);
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
index d65c91d88f60d..10ab50ad4f0f2 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
@@ -48,7 +48,7 @@ struct Tensorrt_Provider : Provider {
   }
 
   std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* provider_options) override {
-    auto& options = *reinterpret_cast<const OrtTensorRTProviderOptions*>(provider_options);
+    auto& options = *reinterpret_cast<const OrtTensorRTProviderOptionsV2*>(provider_options);
     TensorrtExecutionProviderInfo info;
     info.device_id = options.device_id;
     info.has_user_compute_stream = options.has_user_compute_stream != 0;
@@ -69,12 +69,13 @@ struct Tensorrt_Provider : Provider {
     info.engine_decryption_enable = options.trt_engine_decryption_enable != 0;
     info.engine_decryption_lib_path = options.trt_engine_decryption_lib_path == nullptr ? "" : options.trt_engine_decryption_lib_path;
     info.force_sequential_engine_build = options.trt_force_sequential_engine_build != 0;
+    info.timing_cache_enable = options.trt_timing_cache_enable; 
     return std::make_shared<TensorrtProviderFactory>(info);
   }
 
   void UpdateProviderOptions(void* provider_options, const ProviderOptions& options) override {
     auto internal_options = onnxruntime::TensorrtExecutionProviderInfo::FromProviderOptions(options);
-    auto& trt_options = *reinterpret_cast<OrtTensorRTProviderOptions*>(provider_options);
+    auto& trt_options = *reinterpret_cast<OrtTensorRTProviderOptionsV2*>(provider_options);
     trt_options.device_id = internal_options.device_id;
     trt_options.trt_max_partition_iterations = internal_options.max_partition_iterations;
     trt_options.trt_min_subgraph_size = internal_options.min_subgraph_size;
@@ -134,6 +135,7 @@ struct Tensorrt_Provider : Provider {
     }
 
     trt_options.trt_force_sequential_engine_build = internal_options.force_sequential_engine_build;
+    trt_options.trt_timing_cache_enable = internal_options.timing_cache_enable;
   }
 
   ProviderOptions GetProviderOptions(const void* provider_options) override {
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 36ebf32f0499f..b2e69bb4c94f3 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1155,7 +1155,36 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGrap
   return nullptr;
 }
 
+// Adapter to convert the legacy OrtTensorRTProviderOptions to the latest OrtTensorRTProviderOptionsV2
+OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(const OrtTensorRTProviderOptions* legacy_trt_options) {
+  OrtTensorRTProviderOptionsV2 trt_options_converted;
+
+  trt_options_converted.device_id = legacy_trt_options->device_id;
+  trt_options_converted.has_user_compute_stream = legacy_trt_options->has_user_compute_stream;
+  trt_options_converted.user_compute_stream = legacy_trt_options->user_compute_stream;
+  trt_options_converted.trt_max_partition_iterations = legacy_trt_options->trt_max_partition_iterations;
+  trt_options_converted.trt_min_subgraph_size = legacy_trt_options->trt_min_subgraph_size;
+  trt_options_converted.trt_max_workspace_size = legacy_trt_options->trt_max_workspace_size;
+  trt_options_converted.trt_fp16_enable = legacy_trt_options->trt_fp16_enable;
+  trt_options_converted.trt_int8_enable = legacy_trt_options->trt_int8_enable;
+  trt_options_converted.trt_int8_calibration_table_name = legacy_trt_options->trt_int8_calibration_table_name;
+  trt_options_converted.trt_int8_use_native_calibration_table = legacy_trt_options->trt_int8_use_native_calibration_table;
+  trt_options_converted.trt_dla_enable = legacy_trt_options->trt_dla_enable;
+  trt_options_converted.trt_dla_core = legacy_trt_options->trt_dla_core;
+  trt_options_converted.trt_dump_subgraphs = legacy_trt_options->trt_dump_subgraphs;
+  trt_options_converted.trt_engine_cache_enable = legacy_trt_options->trt_engine_cache_enable;
+  trt_options_converted.trt_engine_cache_path = legacy_trt_options->trt_engine_cache_path;
+  trt_options_converted.trt_engine_decryption_enable = legacy_trt_options->trt_engine_decryption_enable;
+  trt_options_converted.trt_engine_decryption_lib_path = legacy_trt_options->trt_engine_decryption_lib_path;
+  trt_options_converted.trt_force_sequential_engine_build = legacy_trt_options->trt_force_sequential_engine_build;
+  // Use default value as this field is not available in OrtTensorRTProviderOptionsV2
+  trt_options_converted.trt_timing_cache_enalbed = 0;
+
+  return trt_options_converted;
+}
+
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* provider_options) {
+  OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options);
   if (auto* provider = s_library_tensorrt.Get())
     return provider->CreateExecutionProviderFactory(provider_options);
 
@@ -1466,6 +1495,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateTensorRTProviderOptions, _Outptr_ OrtTensorRT
   (*out)->trt_engine_decryption_enable = false;
   (*out)->trt_engine_decryption_lib_path = nullptr;
   (*out)->trt_force_sequential_engine_build = false;
+  (*out)->trt_timing_cache_enable = false;
   return nullptr;
 #else
   ORT_UNUSED_PARAMETER(out);

From 83e251d68d7f9260ebda428efc67299a89d027b7 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 15 Jan 2022 00:05:00 +0000
Subject: [PATCH 06/30] fix bug

---
 onnxruntime/core/session/provider_bridge_ort.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index b2e69bb4c94f3..9ec828d28cd01 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1178,7 +1178,7 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti
   trt_options_converted.trt_engine_decryption_lib_path = legacy_trt_options->trt_engine_decryption_lib_path;
   trt_options_converted.trt_force_sequential_engine_build = legacy_trt_options->trt_force_sequential_engine_build;
   // Use default value as this field is not available in OrtTensorRTProviderOptionsV2
-  trt_options_converted.trt_timing_cache_enalbed = 0;
+  trt_options_converted.trt_timing_cache_enable = 0;
 
   return trt_options_converted;
 }
@@ -1186,7 +1186,7 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* provider_options) {
   OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options);
   if (auto* provider = s_library_tensorrt.Get())
-    return provider->CreateExecutionProviderFactory(provider_options);
+    return provider->CreateExecutionProviderFactory(&trt_options_converted);
 
   return nullptr;
 }

From 5b52f63e2790050d55ad0610bbc65025ca7ef01d Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 15 Jan 2022 00:16:33 +0000
Subject: [PATCH 07/30] fix bug

---
 .../tensorrt/tensorrt_provider_factory.cc     | 41 ++++++++++---------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
index 10ab50ad4f0f2..12ec9f51d6eb6 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
@@ -6,6 +6,7 @@
 #include <atomic>
 #include "tensorrt_execution_provider.h"
 #include "core/framework/provider_options.h"
+#include "core/providers/tensorrt/tensorrt_provider_options.h"
 #include <string.h>
 
 using namespace onnxruntime;
@@ -48,28 +49,28 @@ struct Tensorrt_Provider : Provider {
   }
 
   std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* provider_options) override {
-    auto& options = *reinterpret_cast<const OrtTensorRTProviderOptionsV2*>(provider_options);
+    auto options = reinterpret_cast<const OrtTensorRTProviderOptionsV2*>(provider_options);
     TensorrtExecutionProviderInfo info;
-    info.device_id = options.device_id;
-    info.has_user_compute_stream = options.has_user_compute_stream != 0;
-    info.user_compute_stream = options.user_compute_stream;
+    info.device_id = options->device_id;
+    info.has_user_compute_stream = options->has_user_compute_stream != 0;
+    info.user_compute_stream = options->user_compute_stream;
     info.has_trt_options = true;
-    info.max_partition_iterations = options.trt_max_partition_iterations;
-    info.min_subgraph_size = options.trt_min_subgraph_size;
-    info.max_workspace_size = options.trt_max_workspace_size;
-    info.fp16_enable = options.trt_fp16_enable != 0;
-    info.int8_enable = options.trt_int8_enable != 0;
-    info.int8_calibration_table_name = options.trt_int8_calibration_table_name == nullptr ? "" : options.trt_int8_calibration_table_name;
-    info.int8_use_native_calibration_table = options.trt_int8_use_native_calibration_table != 0;
-    info.dla_enable = options.trt_dla_enable != 0;
-    info.dla_core = options.trt_dla_core;
-    info.dump_subgraphs = options.trt_dump_subgraphs != 0;
-    info.engine_cache_enable = options.trt_engine_cache_enable != 0;
-    info.engine_cache_path = options.trt_engine_cache_path == nullptr ? "" : options.trt_engine_cache_path;
-    info.engine_decryption_enable = options.trt_engine_decryption_enable != 0;
-    info.engine_decryption_lib_path = options.trt_engine_decryption_lib_path == nullptr ? "" : options.trt_engine_decryption_lib_path;
-    info.force_sequential_engine_build = options.trt_force_sequential_engine_build != 0;
-    info.timing_cache_enable = options.trt_timing_cache_enable; 
+    info.max_partition_iterations = options->trt_max_partition_iterations;
+    info.min_subgraph_size = options->trt_min_subgraph_size;
+    info.max_workspace_size = options->trt_max_workspace_size;
+    info.fp16_enable = options->trt_fp16_enable != 0;
+    info.int8_enable = options->trt_int8_enable != 0;
+    info.int8_calibration_table_name = options->trt_int8_calibration_table_name == nullptr ? "" : options->trt_int8_calibration_table_name;
+    info.int8_use_native_calibration_table = options->trt_int8_use_native_calibration_table != 0;
+    info.dla_enable = options->trt_dla_enable != 0;
+    info.dla_core = options->trt_dla_core;
+    info.dump_subgraphs = options->trt_dump_subgraphs != 0;
+    info.engine_cache_enable = options->trt_engine_cache_enable != 0;
+    info.engine_cache_path = options->trt_engine_cache_path == nullptr ? "" : options->trt_engine_cache_path;
+    info.engine_decryption_enable = options->trt_engine_decryption_enable != 0;
+    info.engine_decryption_lib_path = options->trt_engine_decryption_lib_path == nullptr ? "" : options->trt_engine_decryption_lib_path;
+    info.force_sequential_engine_build = options->trt_force_sequential_engine_build != 0;
+    info.timing_cache_enable = options->trt_timing_cache_enable; 
     return std::make_shared<TensorrtProviderFactory>(info);
   }
 

From 4ff502d482c4c26eac8578f0cb9a8b730881e178 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 15 Jan 2022 00:19:28 +0000
Subject: [PATCH 08/30] revert modification

---
 .../tensorrt/tensorrt_provider_factory.cc     | 40 +++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
index 12ec9f51d6eb6..80772436c9e79 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
@@ -49,28 +49,28 @@ struct Tensorrt_Provider : Provider {
   }
 
   std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* provider_options) override {
-    auto options = reinterpret_cast<const OrtTensorRTProviderOptionsV2*>(provider_options);
+    auto& options = *reinterpret_cast<const OrtTensorRTProviderOptionsV2*>(provider_options);
     TensorrtExecutionProviderInfo info;
-    info.device_id = options->device_id;
-    info.has_user_compute_stream = options->has_user_compute_stream != 0;
-    info.user_compute_stream = options->user_compute_stream;
+    info.device_id = options.device_id;
+    info.has_user_compute_stream = options.has_user_compute_stream != 0;
+    info.user_compute_stream = options.user_compute_stream;
     info.has_trt_options = true;
-    info.max_partition_iterations = options->trt_max_partition_iterations;
-    info.min_subgraph_size = options->trt_min_subgraph_size;
-    info.max_workspace_size = options->trt_max_workspace_size;
-    info.fp16_enable = options->trt_fp16_enable != 0;
-    info.int8_enable = options->trt_int8_enable != 0;
-    info.int8_calibration_table_name = options->trt_int8_calibration_table_name == nullptr ? "" : options->trt_int8_calibration_table_name;
-    info.int8_use_native_calibration_table = options->trt_int8_use_native_calibration_table != 0;
-    info.dla_enable = options->trt_dla_enable != 0;
-    info.dla_core = options->trt_dla_core;
-    info.dump_subgraphs = options->trt_dump_subgraphs != 0;
-    info.engine_cache_enable = options->trt_engine_cache_enable != 0;
-    info.engine_cache_path = options->trt_engine_cache_path == nullptr ? "" : options->trt_engine_cache_path;
-    info.engine_decryption_enable = options->trt_engine_decryption_enable != 0;
-    info.engine_decryption_lib_path = options->trt_engine_decryption_lib_path == nullptr ? "" : options->trt_engine_decryption_lib_path;
-    info.force_sequential_engine_build = options->trt_force_sequential_engine_build != 0;
-    info.timing_cache_enable = options->trt_timing_cache_enable; 
+    info.max_partition_iterations = options.trt_max_partition_iterations;
+    info.min_subgraph_size = options.trt_min_subgraph_size;
+    info.max_workspace_size = options.trt_max_workspace_size;
+    info.fp16_enable = options.trt_fp16_enable != 0;
+    info.int8_enable = options.trt_int8_enable != 0;
+    info.int8_calibration_table_name = options.trt_int8_calibration_table_name == nullptr ? "" : options.trt_int8_calibration_table_name;
+    info.int8_use_native_calibration_table = options.trt_int8_use_native_calibration_table != 0;
+    info.dla_enable = options.trt_dla_enable != 0;
+    info.dla_core = options.trt_dla_core;
+    info.dump_subgraphs = options.trt_dump_subgraphs != 0;
+    info.engine_cache_enable = options.trt_engine_cache_enable != 0;
+    info.engine_cache_path = options.trt_engine_cache_path == nullptr ? "" : options.trt_engine_cache_path;
+    info.engine_decryption_enable = options.trt_engine_decryption_enable != 0;
+    info.engine_decryption_lib_path = options.trt_engine_decryption_lib_path == nullptr ? "" : options.trt_engine_decryption_lib_path;
+    info.force_sequential_engine_build = options.trt_force_sequential_engine_build != 0;
+    info.timing_cache_enable = options.trt_timing_cache_enable; 
     return std::make_shared<TensorrtProviderFactory>(info);
   }
 

From 2dd319437f2ba4cd86e1c277a2371a336f70687e Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 15 Jan 2022 00:33:50 +0000
Subject: [PATCH 09/30] small modification

---
 .../core/providers/tensorrt/tensorrt_execution_provider.cc      | 1 +
 .../core/providers/tensorrt/tensorrt_execution_provider.h       | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 4e0b35152b549..d1b2e8895759f 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -468,6 +468,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
       engine_decryption_lib_path_ = info.engine_decryption_lib_path;
     }
     force_sequential_engine_build_ = info.force_sequential_engine_build;
+    timing_cache_enable_ = info.timing_cache_enable;
   } else {
     const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations);
     if (!max_partition_iterations_env.empty()) {
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index 46f90565f88a0..e8bbd44ea961a 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -169,7 +169,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   bool engine_decryption_enable_ = false;
   int (*engine_decryption_)(const char*, char*, size_t*);
   int (*engine_encryption_)(const char*, char*, size_t);
-  bool timing_cache_enable_ = true;
+  bool timing_cache_enable_ = false;
 
   std::unordered_map<std::string, tensorrt_ptr::unique_pointer<nvonnxparser::IParser>> parsers_;
   std::unordered_map<std::string, tensorrt_ptr::unique_pointer<nvinfer1::ICudaEngine>> engines_;

From 8d75a7094c44b3ed6f604b81d65dfcf8948745de Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 15 Jan 2022 00:38:27 +0000
Subject: [PATCH 10/30] remove intrumentation code for recording engine build
 latency

---
 .../tensorrt/tensorrt_execution_provider.cc      | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index d1b2e8895759f..f14b5b418ed57 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -19,9 +19,6 @@
 #include <limits>
 #include <map>
 #include <memory>
-#include <chrono>
-#include <unistd.h>
-#include <iostream>
 #include "flatbuffers/idl.h"
 #include "ort_trt_int8_cal_table.fbs.h"
 
@@ -1384,9 +1381,6 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
           }
         }
 
-        LOGS_DEFAULT(WARNING) << timing_cache_enable_;
-        LOGS_DEFAULT(WARNING) << timing_cache_path;
-
         // Load timing cache from file. Create a fresh cache if the file doesn't exist
         std::unique_ptr<nvinfer1::ITimingCache> timing_cache = nullptr;
         if (timing_cache_enable_) {
@@ -1402,11 +1396,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
         // Build engine
         {
           auto lock = GetEngineBuildLock();
-          auto start = std::chrono::high_resolution_clock::now();
           trt_engine = tensorrt_ptr::unique_pointer<nvinfer1::ICudaEngine>(trt_builder->buildEngineWithConfig(*trt_network, *trt_config));
-          auto end = std::chrono::high_resolution_clock::now();
-          std::chrono::duration<double> duration = end - start;
-          LOGS_DEFAULT(WARNING) << "Elapsed time (in Compile) in milliseconds: " << duration.count();
         }
         if (trt_engine == nullptr) {
           return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
@@ -1757,8 +1747,6 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
           trt_config->setDLACore(trt_state->dla_core);
         }
 
-        LOGS_DEFAULT(WARNING) << timing_cache_enable_;
-        LOGS_DEFAULT(WARNING) << timing_cache_path;
         // Load timing cache from file. Create a fresh cache if the file doesn't exist
         std::unique_ptr<nvinfer1::ITimingCache> timing_cache = nullptr;
         if (trt_state->timing_cache_enable) {
@@ -1774,12 +1762,8 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
         // Build engine
         {
           auto lock = GetEngineBuildLock();
-          auto start = std::chrono::high_resolution_clock::now();
           *(trt_state->engine) = tensorrt_ptr::unique_pointer<nvinfer1::ICudaEngine>(
               trt_builder->buildEngineWithConfig(*trt_state->network->get(), *trt_config));
-          auto end = std::chrono::high_resolution_clock::now();
-          std::chrono::duration<double> duration = end - start;
-          LOGS_DEFAULT(WARNING) << "Elapsed time (in compute_func) in milliseconds: " << duration.count();
         }
         if (trt_state->engine == nullptr) {
           return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP Failed to Build Engine.");

From 58e37fc17748578b9edeee6cde21b24024b382a6 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 15 Jan 2022 21:34:59 +0000
Subject: [PATCH 11/30] add timing_cache_enable as additional member of
 internal TensorRT provider options struct

---
 .../core/session/onnxruntime_cxx_api.h          |  1 +
 .../core/session/onnxruntime_cxx_inline.h       |  5 +++++
 onnxruntime/core/session/provider_bridge_ort.cc | 17 ++++++++++++++++-
 onnxruntime/test/perftest/ort_test_session.cc   | 17 ++++++++++++++---
 4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 12370aafa80d4..e9ee68b8032fe 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -351,6 +351,7 @@ struct SessionOptions : Base<OrtSessionOptions> {
   SessionOptions& AppendExecutionProvider_ROCM(const OrtROCMProviderOptions& provider_options);          ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_ROCM
   SessionOptions& AppendExecutionProvider_OpenVINO(const OrtOpenVINOProviderOptions& provider_options);  ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_OpenVINO
   SessionOptions& AppendExecutionProvider_TensorRT(const OrtTensorRTProviderOptions& provider_options);  ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_TensorRT
+  SessionOptions& AppendExecutionProvider_TensorRT_V2(const OrtTensorRTProviderOptionsV2& provider_options);  ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_TensorRT
   SessionOptions& AppendExecutionProvider_MIGraphX(const OrtMIGraphXProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_MIGraphX
 
   SessionOptions& SetCustomCreateThreadFn(OrtCustomCreateThreadFn ort_custom_create_thread_fn);  ///< Wraps OrtApi::SessionOptionsSetCustomCreateThreadFn
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index d281bb5542797..063acb1702a84 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -518,6 +518,11 @@ inline SessionOptions& SessionOptions::AppendExecutionProvider_TensorRT(const Or
   return *this;
 }
 
+inline SessionOptions& SessionOptions::AppendExecutionProvider_TensorRT_V2(const OrtTensorRTProviderOptionsV2& provider_options) {
+  ThrowOnError(GetApi().SessionOptionsAppendExecutionProvider_TensorRT_V2(p_, &provider_options));
+  return *this;
+}
+
 inline SessionOptions& SessionOptions::AppendExecutionProvider_MIGraphX(const OrtMIGraphXProviderOptions& provider_options) {
   ThrowOnError(GetApi().SessionOptionsAppendExecutionProvider_MIGraphX(p_, &provider_options));
   return *this;
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 9ec828d28cd01..7088df707a2af 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1191,6 +1191,13 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensor
   return nullptr;
 }
 
+std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptionsV2* provider_options) {
+  if (auto* provider = s_library_tensorrt.Get())
+    return provider->CreateExecutionProviderFactory(provider_options);
+
+  return nullptr;
+}
+
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(const OrtMIGraphXProviderOptions* provider_options) {
   if (auto* provider = s_library_migraphx.Get())
     return provider->CreateExecutionProviderFactory(provider_options);
@@ -1470,7 +1477,15 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_ROCM, _In_ Or
 }
 
 ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptionsV2* tensorrt_options) {
-  return OrtApis::SessionOptionsAppendExecutionProvider_TensorRT(options, reinterpret_cast<const OrtTensorRTProviderOptions*>(tensorrt_options));
+  API_IMPL_BEGIN
+  auto factory = onnxruntime::CreateExecutionProviderFactory_Tensorrt(tensorrt_options);
+  if (!factory) {
+    return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_TensorRT: Failed to load shared library");
+  }
+
+  options->provider_factories.push_back(factory);
+  return nullptr;
+  API_IMPL_END
 }
 
 ORT_API_STATUS_IMPL(OrtApis::CreateTensorRTProviderOptions, _Outptr_ OrtTensorRTProviderOptionsV2** out) {
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index 551709ea621cd..866494d992006 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -1,6 +1,7 @@
 #include "ort_test_session.h"
 #include <core/session/onnxruntime_cxx_api.h>
 #include "core/session/onnxruntime_session_options_config_keys.h"
+#include "core/providers/tensorrt/tensorrt_provider_options.h"
 #include <assert.h>
 #include "providers.h"
 #include "TestCase.h"
@@ -74,6 +75,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
     bool trt_engine_decryption_enable = false;
     std::string trt_engine_decryption_lib_path = "";
     bool trt_force_sequential_engine_build = false;
+    bool trt_timing_cache_enable = false;
 
     #ifdef _MSC_VER
     std::string ov_string = ToMBString(performance_test_config.run_config.ep_runtime_config_string);
@@ -205,11 +207,19 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
         } else {
           ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. true or false. Default value is false.\n");
         }
+      } else if (key == "trt_timing_cache_enable") {
+        if (value == "true" || value == "True") {
+          trt_timing_cache_enable = true;
+        } else if (value == "false" || value == "False") {
+          trt_timing_cache_enable = false;
+        } else {
+          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_enable' should be a boolean i.e. true or false. Default value is false.\n");
+        }
       } else {
-        ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['device_id', 'trt_max_partition_iterations', 'trt_min_subgraph_size', 'trt_max_workspace_size', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table', 'trt_dla_enable', 'trt_dla_core', 'trt_dump_subgraphs', 'trt_engine_cache_enable', 'trt_engine_cache_path', 'trt_engine_decryption_enable', 'trt_engine_decryption_lib_path', 'trt_force_sequential_engine_build'] \n");
+        ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['device_id', 'trt_max_partition_iterations', 'trt_min_subgraph_size', 'trt_max_workspace_size', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table', 'trt_dla_enable', 'trt_dla_core', 'trt_dump_subgraphs', 'trt_engine_cache_enable', 'trt_engine_cache_path', 'trt_engine_decryption_enable', 'trt_engine_decryption_lib_path', 'trt_force_sequential_engine_build', 'trt_timing_cache_enable'] \n");
       }
     }
-    OrtTensorRTProviderOptions tensorrt_options;
+    OrtTensorRTProviderOptionsV2 tensorrt_options;
     tensorrt_options.device_id = device_id;
     tensorrt_options.has_user_compute_stream = 0;
     tensorrt_options.user_compute_stream = nullptr;
@@ -228,7 +238,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
     tensorrt_options.trt_engine_decryption_enable = trt_engine_decryption_enable;
     tensorrt_options.trt_engine_decryption_lib_path = trt_engine_decryption_lib_path.c_str();
     tensorrt_options.trt_force_sequential_engine_build = trt_force_sequential_engine_build;
-    session_options.AppendExecutionProvider_TensorRT(tensorrt_options);
+    tensorrt_options.trt_timing_cache_enable = trt_timing_cache_enable;
+    session_options.AppendExecutionProvider_TensorRT_V2(tensorrt_options);
 
     OrtCUDAProviderOptions cuda_options;
     cuda_options.device_id=device_id;

From b707c65d43021eb7a41885abba59844f6bba6a69 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 17 Jan 2022 21:00:33 +0000
Subject: [PATCH 12/30] fix warning

---
 .../core/providers/tensorrt/tensorrt_provider_factory.cc        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
index 80772436c9e79..6e8f8be6f5cd7 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
@@ -70,7 +70,7 @@ struct Tensorrt_Provider : Provider {
     info.engine_decryption_enable = options.trt_engine_decryption_enable != 0;
     info.engine_decryption_lib_path = options.trt_engine_decryption_lib_path == nullptr ? "" : options.trt_engine_decryption_lib_path;
     info.force_sequential_engine_build = options.trt_force_sequential_engine_build != 0;
-    info.timing_cache_enable = options.trt_timing_cache_enable; 
+    info.timing_cache_enable = options.trt_timing_cache_enable != 0;
     return std::make_shared<TensorrtProviderFactory>(info);
   }
 

From 48ecbeb563b17b995229545ce913f60221d8d156 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 28 Jan 2022 23:23:55 +0000
Subject: [PATCH 13/30] enable trt timing cache for model tests

---
 onnxruntime/test/providers/cpu/model_tests.cc | 20 +++++++++++++------
 onnxruntime/test/util/default_providers.cc    | 10 ++++++++++
 .../test/util/include/default_providers.h     |  1 +
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index fbdb421e5fa46..59af6ea232522 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -18,6 +18,7 @@
 #include "test/onnx/heap_buffer.h"
 #include "test/onnx/onnx_model_info.h"
 #include "test/onnx/callback.h"
+#include "core/providers/tensorrt/tensorrt_provider_options.h"
 
 extern std::unique_ptr<Ort::Env> ort_env;
 
@@ -587,15 +588,14 @@ TEST_P(ModelTest, Run) {
       } else if (provider_name == "nuphar") {
         ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultNupharExecutionProvider()));
       } else if (provider_name == "tensorrt") {
-        if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) {
-          OrtTensorRTProviderOptions params{
+        OrtTensorRTProviderOptionsV2 params{
               0,
               0,
               nullptr,
               1000,
               1,
               1 << 30,
-              1, // enable fp16
+              0,
               0,
               nullptr,
               0,
@@ -603,13 +603,21 @@ TEST_P(ModelTest, Run) {
               0,
               0,
               0,
-              nullptr,
+#ifdef _WIN32
+              "C:\\local\\trt_timing_cache", // directory where timing caches locate in CI Windows image
+#else
+              "/data/trt_timing_cache",      // directory where timing caches locate in CI Linux image
+#endif
               0,
               nullptr,
-              0};
+              0,
+              1 // enable trt timing cache to reduce CI testing time for trt ep
+            };
+        if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) {
+          params.trt_fp16_enable = 1;
           ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(TensorrtExecutionProviderWithOptions(&params)));
         } else {
-          ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultTensorrtExecutionProvider()));
+          ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultTensorrtExecutionProvider(&params)));
         }
         ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider()));
       } else if (provider_name == "migraphx") {
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index 209d4244229fc..5e0975d667fb9 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -54,6 +54,16 @@ std::unique_ptr<IExecutionProvider> TensorrtExecutionProviderWithOptions(const O
   return nullptr;
 }
 
+std::unique_ptr<IExecutionProvider> TensorrtExecutionProviderWithOptions(const OrtTensorRTProviderOptionsV2* params) {
+#ifdef USE_TENSORRT
+  if (auto factory = CreateExecutionProviderFactory_Tensorrt(params))
+    return factory->CreateProvider();
+#else
+  ORT_UNUSED_PARAMETER(params);
+#endif
+  return nullptr;
+}
+
 std::unique_ptr<IExecutionProvider> DefaultMIGraphXExecutionProvider() {
 #ifdef USE_MIGRAPHX
   OrtMIGraphXProviderOptions params{
diff --git a/onnxruntime/test/util/include/default_providers.h b/onnxruntime/test/util/include/default_providers.h
index 6fa50c61cdefa..04f25d1990126 100644
--- a/onnxruntime/test/util/include/default_providers.h
+++ b/onnxruntime/test/util/include/default_providers.h
@@ -24,6 +24,7 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVI
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rknpu();
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rocm(const OrtROCMProviderOptions* provider_options);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params);
+std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptionsV2* params);
 
 // EP for internal testing
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_InternalTesting(const std::unordered_set<std::string>& supported_ops);

From 9dc0d162e45f6ac4b850606672ddd356d1ff00a6 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 29 Jan 2022 18:05:47 +0000
Subject: [PATCH 14/30] enable timing cache for model tests

---
 .../tensorrt/tensorrt_execution_provider.cc     | 17 +++++++++--------
 onnxruntime/test/providers/cpu/model_tests.cc   |  2 +-
 .../test/util/include/default_providers.h       |  1 +
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index f14b5b418ed57..0db5a9eb17825 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -457,7 +457,8 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     }
     dump_subgraphs_ = info.dump_subgraphs;
     engine_cache_enable_ = info.engine_cache_enable;
-    if (engine_cache_enable_ || int8_enable_) {
+    timing_cache_enable_ = info.timing_cache_enable;
+    if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
       cache_path_ = info.engine_cache_path;
     }
     engine_decryption_enable_ = info.engine_decryption_enable;
@@ -465,7 +466,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
       engine_decryption_lib_path_ = info.engine_decryption_lib_path;
     }
     force_sequential_engine_build_ = info.force_sequential_engine_build;
-    timing_cache_enable_ = info.timing_cache_enable;
   } else {
     const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations);
     if (!max_partition_iterations_env.empty()) {
@@ -528,7 +528,12 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
       engine_cache_enable_ = (std::stoi(engine_cache_enable_env) == 0 ? false : true);
     }
 
-    if (engine_cache_enable_ || int8_enable_) {
+    const std::string timing_cache_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kTimingCacheEnable);
+    if (!timing_cache_enable_env.empty()) {
+      timing_cache_enable_ = (std::stoi(timing_cache_enable_env) == 0 ? false : true);
+    }
+
+    if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
       const std::string engine_cache_path = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEngineCachePath);
       cache_path_ = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kCachePath);
       if (!engine_cache_path.empty() && cache_path_.empty()) {
@@ -551,10 +556,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
       force_sequential_engine_build_ = (std::stoi(force_sequential_engine_build_env) == 0 ? false : true);
     }
 
-    const std::string timing_cache_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kTimingCacheEnable);
-    if (!timing_cache_enable_env.empty()) {
-      timing_cache_enable_ = (std::stoi(timing_cache_enable_env) == 0 ? false : true);
-    }
   }
 
   // Validate setting
@@ -575,7 +576,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     dla_core_ = 0;
   }
 
-  if (engine_cache_enable_ || int8_enable_) {
+  if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
     if (!cache_path_.empty() && !fs::is_directory(cache_path_)) {
       if (!fs::create_directory(cache_path_)) {
         throw std::runtime_error("Failed to create directory " + cache_path_);
diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 88db8f2cf8849..399cb65829549 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -620,7 +620,7 @@ TEST_P(ModelTest, Run) {
           params.trt_fp16_enable = 1;
           ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(TensorrtExecutionProviderWithOptions(&params)));
         } else {
-          ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultTensorrtExecutionProvider(&params)));
+          ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(TensorrtExecutionProviderWithOptions(&params)));
         }
         ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider()));
       } else if (provider_name == "migraphx") {
diff --git a/onnxruntime/test/util/include/default_providers.h b/onnxruntime/test/util/include/default_providers.h
index 04f25d1990126..980129e95c7c4 100644
--- a/onnxruntime/test/util/include/default_providers.h
+++ b/onnxruntime/test/util/include/default_providers.h
@@ -39,6 +39,7 @@ std::unique_ptr<IExecutionProvider> DefaultNupharExecutionProvider(bool allow_un
 //std::unique_ptr<IExecutionProvider> DefaultStvmExecutionProvider();
 std::unique_ptr<IExecutionProvider> DefaultTensorrtExecutionProvider();
 std::unique_ptr<IExecutionProvider> TensorrtExecutionProviderWithOptions(const OrtTensorRTProviderOptions* params);
+std::unique_ptr<IExecutionProvider> TensorrtExecutionProviderWithOptions(const OrtTensorRTProviderOptionsV2* params);
 std::unique_ptr<IExecutionProvider> DefaultMIGraphXExecutionProvider();
 std::unique_ptr<IExecutionProvider> MIGraphXExecutionProviderWithOptions(const OrtMIGraphXProviderOptions* params);
 std::unique_ptr<IExecutionProvider> DefaultOpenVINOExecutionProvider();

From 0f378b1353736ecf76baa4fa7fd034d284d3887c Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 29 Jan 2022 19:05:22 +0000
Subject: [PATCH 15/30] change pool

---
 .../github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
index ad6a5d2a4d555..e2c65b5121548 100644
--- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
@@ -1,6 +1,6 @@
 jobs:
 - job: 'build'
-  pool: 'onnxruntime-tensorrt8-winbuild'
+  pool: 'onnxruntime-gpu-tensorrt8-winbuild '
   variables:
     OrtPackageId: 'Microsoft.ML.OnnxRuntime'
     MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary'

From 72b76457b4a9ffa4dac25f11a71a2f54ab70b831 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 29 Jan 2022 21:19:55 +0000
Subject: [PATCH 16/30] change back previous pool

---
 .../github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
index e2c65b5121548..323c8fa4e6e87 100644
--- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
@@ -1,6 +1,6 @@
 jobs:
 - job: 'build'
-  pool: 'onnxruntime-gpu-tensorrt8-winbuild '
+  pool: 'onnxruntime-tensorrt8-winbuild '
   variables:
     OrtPackageId: 'Microsoft.ML.OnnxRuntime'
     MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary'

From 2c5ac28636af26bb1f2ed084e86bc422b7c8b5e5 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 31 Jan 2022 22:44:38 +0000
Subject: [PATCH 17/30] change path of trt_timing_cache

---
 onnxruntime/test/providers/cpu/model_tests.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 399cb65829549..d455139369ed3 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -607,9 +607,9 @@ TEST_P(ModelTest, Run) {
               0,
               0,
 #ifdef _WIN32
-              "C:\\local\\trt_timing_cache", // directory where timing caches locate in CI Windows image
+              "C:\\local\\models\\trt_timing_cache", // directory where timing caches locate in CI Windows image
 #else
-              "/data/trt_timing_cache",      // directory where timing caches locate in CI Linux image
+              "/data/models/trt_timing_cache",      // directory where timing caches locate in CI Linux image
 #endif
               0,
               nullptr,

From 93e61f0906261bc47409366d5d6ca94cdce44232 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 4 Feb 2022 22:19:02 +0000
Subject: [PATCH 18/30] refactor code

---
 onnxruntime/test/providers/cpu/model_tests.cc         | 11 +++--------
 .../azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml  |  2 +-
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index d455139369ed3..b7890e1b8bff1 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -606,21 +606,16 @@ TEST_P(ModelTest, Run) {
               0,
               0,
               0,
-#ifdef _WIN32
-              "C:\\local\\models\\trt_timing_cache", // directory where timing caches locate in CI Windows image
-#else
-              "/data/models/trt_timing_cache",      // directory where timing caches locate in CI Linux image
-#endif
+              nullptr,
               0,
               nullptr,
               0,
-              1 // enable trt timing cache to reduce CI testing time for trt ep
-            };
+              0};
         if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) {
           params.trt_fp16_enable = 1;
           ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(TensorrtExecutionProviderWithOptions(&params)));
         } else {
-          ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(TensorrtExecutionProviderWithOptions(&params)));
+          ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultTensorrtExecutionProvider()));
         }
         ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider()));
       } else if (provider_name == "migraphx") {
diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
index 323c8fa4e6e87..ad6a5d2a4d555 100644
--- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
@@ -1,6 +1,6 @@
 jobs:
 - job: 'build'
-  pool: 'onnxruntime-tensorrt8-winbuild '
+  pool: 'onnxruntime-tensorrt8-winbuild'
   variables:
     OrtPackageId: 'Microsoft.ML.OnnxRuntime'
     MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary'

From c9813c2185ece8d8b5ca9d7535fe24a98dfc0f18 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 4 Feb 2022 22:19:52 +0000
Subject: [PATCH 19/30] Add test cases for timing cache

---
 .../providers/tensorrt/tensorrt_basic_test.cc | 265 ++++++++++++++++++
 1 file changed, 265 insertions(+)

diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index c24401a1b89a3..2085d1e3230d9 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -7,6 +7,11 @@
 #include "gtest/gtest.h"
 #include "test/util/include/default_providers.h"
 #include "test/util/include/scoped_env_vars.h"
+#include "core/providers/tensorrt/tensorrt_provider_options.h"
+#include <string>
+#include <iostream>
+#include <filesystem>
+namespace fs = std::filesystem;
 
 using namespace std;
 using namespace ONNX_NAMESPACE;
@@ -15,6 +20,8 @@ using namespace ::onnxruntime::logging;
 namespace onnxruntime {
 
 namespace test {
+class TensorrtExecutionProviderCacheTest: public testing::TestWithParam<std::basic_string<ORTCHAR_T>> {};
+
 template <typename T>
 void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64_t>& expected_dims,
                    const std::vector<T>& expected_values) {
@@ -26,6 +33,264 @@ void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64
   ASSERT_EQ(expected_values, found);
 }
 
+bool IsTensorRTCacheExisted(std::basic_string<ORTCHAR_T> path, std::basic_string<ORTCHAR_T> file_extension) {
+  for (const auto & entry : fs::directory_iterator(path)) {
+      if (file_extension.compare(fs::path(entry).extension()) == 0) {
+          return true;
+      }
+  }
+  return false;
+}
+
+void RemoveTensorRTCache(std::basic_string<ORTCHAR_T> path, std::basic_string<ORTCHAR_T> file_extension) {
+  for (const auto & entry : fs::directory_iterator(path)) {
+      if (file_extension.compare(fs::path(entry).extension()) == 0) {
+          fs::remove(entry);
+      }
+  }
+}
+
+void CreateBaseModel(std::basic_string<ORTCHAR_T> model_name, std::basic_string<ORTCHAR_T> graph_name, bool is_dynamic_input_shape, std::vector<int> dims) {
+  onnxruntime::Model model(graph_name, false, DefaultLoggingManager().DefaultLogger());
+  auto& graph = model.MainGraph();
+  std::vector<onnxruntime::NodeArg*> inputs;
+  std::vector<onnxruntime::NodeArg*> outputs;
+
+  // FLOAT tensor
+  ONNX_NAMESPACE::TypeProto float_tensor;
+  float_tensor.mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
+
+  for (auto dim: dims) {
+    float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(dim);
+  }
+
+  if (is_dynamic_input_shape) {
+    float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("sym1");
+    float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("sym2");
+  }
+
+  auto& input_arg_1 = graph.GetOrCreateNodeArg("X", &float_tensor);
+  auto& input_arg_2 = graph.GetOrCreateNodeArg("Y", &float_tensor);
+  inputs.push_back(&input_arg_1);
+  inputs.push_back(&input_arg_2);
+  auto& output_arg = graph.GetOrCreateNodeArg("node_1_out_1", &float_tensor);
+  outputs.push_back(&output_arg);
+  graph.AddNode("node_1", "Add", "node 1.", inputs, outputs);
+
+  auto& input_arg_3 = graph.GetOrCreateNodeArg("Z", &float_tensor);
+  inputs.clear();
+  inputs.push_back(&output_arg);
+  inputs.push_back(&input_arg_3);
+  auto& output_arg_2 = graph.GetOrCreateNodeArg("M", &float_tensor);
+  outputs.clear();
+  outputs.push_back(&output_arg_2);
+  graph.AddNode("node_2", "Add", "node 2.", inputs, outputs);
+
+  auto status = graph.Resolve();
+  ASSERT_TRUE(status.IsOK());
+  status = onnxruntime::Model::Save(model, model_name);
+}
+
+TEST_P(TensorrtExecutionProviderCacheTest, Run) {
+  // GetParam() consists of two main parameters:
+  // - cache type (engine cache, profile cache and timing cache)
+  // - input type (dynamic input shape or static input shape). 
+  // Note: it might have other paramters used for specific situation
+  std::basic_string<ORTCHAR_T> param = GetParam();
+  std::basic_string<ORTCHAR_T> input_type = "static";
+  std::basic_string<ORTCHAR_T> engine_info = "enginecache_disable"; // for timigh cache case only
+  size_t pos = param.find(ORT_TSTR("_"));
+  ASSERT_NE(pos, std::string::npos);
+  std::basic_string<ORTCHAR_T> cache_type = ToUTF8String(param.substr(0, pos));
+  if (cache_type.compare("timing") == 0) {
+    std::basic_string<ORTCHAR_T> suffix = param.substr(pos + 1);
+    size_t suffix_pos = suffix.find(ORT_TSTR("_"));
+    input_type = ToUTF8String(suffix.substr(0, suffix_pos));
+    engine_info = suffix.substr(suffix_pos + 1);
+  } else {
+    input_type = param.substr(pos + 1);
+  }
+
+  std::basic_string<ORTCHAR_T> model_name = "trt_execution_provider_" + cache_type + "caching_test_" + input_type + ".onnx";
+  std::vector<int> dims; // static dims
+  if (input_type.compare("dynamic") == 0) {
+    dims.push_back(1);
+    CreateBaseModel(model_name, cache_type + "cachingtest", true, dims); // dynamic input shape
+    // dims is (1, sym1, sym2)
+  }
+  else {
+    dims.push_back(1);
+    dims.push_back(3);
+    dims.push_back(2);
+    CreateBaseModel(model_name, cache_type + "cachingtest", false, dims); // non-dynamic input shape 
+    // dims is (1, 3, 2)
+  }
+
+  SessionOptions so;
+  so.session_logid = "TensorrtExecutionProvider" + cache_type + "cacheTest";
+  RunOptions run_options;
+  run_options.run_tag = so.session_logid;
+  InferenceSession session_object{so, GetEnvironment()};
+  auto allocator_manager = session_object.GetAllocatorManager();
+  auto cuda_provider = DefaultCudaExecutionProvider();
+  cuda_provider->RegisterAllocator(allocator_manager);
+  auto cpu_allocator = cuda_provider->GetAllocator(0, OrtMemTypeCPU);
+  std::vector<int64_t> dims_mul_x = {1, 3, 2};
+  std::vector<float> values_mul_x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+  OrtValue ml_value_x;
+  CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_x);
+  OrtValue ml_value_y;
+  CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_y);
+  OrtValue ml_value_z;
+  CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_z);
+  NameMLValMap feeds;
+  feeds.insert(std::make_pair("X", ml_value_x));
+  feeds.insert(std::make_pair("Y", ml_value_y));
+  feeds.insert(std::make_pair("Z", ml_value_z));
+
+  // prepare outputs
+  std::vector<std::string> output_names;
+  output_names.push_back("M");
+  std::vector<OrtValue> fetches;
+
+  // prepare expected inputs and outputs
+  std::vector<int64_t> expected_dims_mul_m = {1, 3, 2};
+  std::vector<float> expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f};
+
+  OrtTensorRTProviderOptionsV2 params{
+      0,
+      0,
+      nullptr,
+      1000,
+      1,
+      1 << 30,
+      0,
+      0,
+      nullptr,
+      0,
+      0,
+      0,
+      0,
+      0,
+      nullptr,
+      0,
+      nullptr,
+      0,
+      0};
+
+  if (cache_type.compare("timing") == 0) {
+
+    // create ort session
+    params.trt_timing_cache_enable = 1;
+    if (engine_info.compare("enginecache_enable") == 0)
+      params.trt_engine_cache_enable = 1;
+    std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
+    EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
+    auto status = session_object.Load(model_name);
+    ASSERT_TRUE(status.IsOK());
+    status = session_object.Initialize();
+    ASSERT_TRUE(status.IsOK());
+
+    // run inference 
+    // timing cache should be created under the situation of non-dynamic/dynamic shape input and engine cache enabled/disabled 
+    status = session_object.Run(run_options, feeds, output_names, &fetches);
+    ASSERT_TRUE(status.IsOK());
+    VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m);
+    ASSERT_TRUE(IsTensorRTCacheExisted("./", ".timing"));
+    RemoveTensorRTCache("./", ".timing");
+
+    // run inference 
+    // timing cache shoud not be used or created since input shape is not changed and engine won't be re-built 
+    status = session_object.Run(run_options, feeds, output_names, &fetches);
+    ASSERT_TRUE(status.IsOK());
+    VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m);
+    ASSERT_TRUE(!IsTensorRTCacheExisted("./", ".timing"));
+
+    // create another ort session to test
+    InferenceSession session_object_2{so, GetEnvironment()};
+    execution_provider = TensorrtExecutionProviderWithOptions(&params);
+    EXPECT_TRUE(session_object_2.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
+    status = session_object_2.Load(model_name);
+    ASSERT_TRUE(status.IsOK());
+    status = session_object_2.Initialize();
+    ASSERT_TRUE(status.IsOK());
+
+    if (engine_info.compare("enginecache_enable") == 0) {
+      // engine cache is enabled
+ 
+      // run inference 
+      // timing cache shoud not be created since engine cache is existed and will be used
+      status = session_object_2.Run(run_options, feeds, output_names, &fetches);
+      ASSERT_TRUE(status.IsOK());
+      VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m);
+      ASSERT_TRUE(!IsTensorRTCacheExisted("./", ".timing"));
+    } else {
+      // engine cache is not enabled
+
+      // run inference 
+      // timing cache shoud be created
+      status = session_object_2.Run(run_options, feeds, output_names, &fetches);
+      ASSERT_TRUE(status.IsOK());
+      VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m);
+      ASSERT_TRUE(IsTensorRTCacheExisted("./", ".timing"));
+      RemoveTensorRTCache("./", ".timing");
+    }
+
+    if (input_type.compare("dynamic") == 0) {
+      // dynamic input shape
+
+      // inference run with input shape {1, 1, 6}
+      // timing cache will be created
+      // TRT engine and profile will be updated
+      dims_mul_x = {1, 1, 6};
+      CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_x);
+      CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_y);
+      CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_z);
+      feeds.clear();
+      feeds.insert(std::make_pair("X", ml_value_x));
+      feeds.insert(std::make_pair("Y", ml_value_y));
+      feeds.insert(std::make_pair("Z", ml_value_z));
+
+      status = session_object_2.Run(run_options, feeds, output_names, &fetches);
+      ASSERT_TRUE(status.IsOK());
+      VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m);
+      ASSERT_TRUE(IsTensorRTCacheExisted("./", ".timing"));
+    }
+
+    // clean up caches for another session
+    RemoveTensorRTCache("./", ".timing");
+    RemoveTensorRTCache("./", ".profile");
+    RemoveTensorRTCache("./", ".engine");
+
+  } else if (cache_type.compare("engine") == 0) {
+    // #TODO
+  } else if (cache_type.compare("profile") == 0) {
+    // #TODO
+  }
+}
+
+auto ExpandModelName  = [](const ::testing::TestParamInfo<TensorrtExecutionProviderCacheTest::ParamType>& info) {
+  // use info.param here to generate the test suffix
+  std::basic_string<ORTCHAR_T> name = info.param;
+#ifdef _WIN32
+  // Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string<char...>.
+  // Need conversion of wchar_t to char.
+  return std::wstring_convert<std::codecvt_utf8<wchar_t>>().to_bytes(name);
+#else
+  return name;
+#endif
+};
+
+// timing_dynamic_enginecache_enable: timing cache enabled, dynamic input shape and engine cache enable
+// timing_dynamic_enginecache_disable: timing cache enabled, dynamic input shape and engine cache disable
+// timing_static_enginecache_enable: timing cache enabled, static input shape and engine cache enable
+// timing_static_enginecache_disable: timing cache enabled, static input shape and engine cache disable
+INSTANTIATE_TEST_SUITE_P(TensorrtExecutionProviderCacheTests, TensorrtExecutionProviderCacheTest, testing::Values("timing_dynamic_enginecache_enable",
+                                                                                                                  "timing_dynamic_enginecache_disable",
+                                                                                                                  "timing_static_enginecache_enable",
+                                                                                                                  "timing_static_enginecache_disable"),
+                                                                                                  ExpandModelName);
+
 TEST(TensorrtExecutionProviderTest, EngineCachingTest) {
   ScopedEnvironmentVariables scoped_env_vars{EnvVarMap{
       {"ORT_TENSORRT_ENGINE_CACHE_ENABLE", {"1"}},

From e69723e82f1c2a667d6390be2cf901fec5a831da Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 4 Feb 2022 23:07:18 +0000
Subject: [PATCH 20/30] fix bug

---
 onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 2085d1e3230d9..4705821f1444a 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -269,7 +269,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
   }
 }
 
-auto ExpandModelName  = [](const ::testing::TestParamInfo<TensorrtExecutionProviderCacheTest::ParamType>& info) {
+auto AddTestName  = [](const ::testing::TestParamInfo<TensorrtExecutionProviderCacheTest::ParamType>& info) {
   // use info.param here to generate the test suffix
   std::basic_string<ORTCHAR_T> name = info.param;
 #ifdef _WIN32
@@ -289,7 +289,7 @@ INSTANTIATE_TEST_SUITE_P(TensorrtExecutionProviderCacheTests, TensorrtExecutionP
                                                                                                                   "timing_dynamic_enginecache_disable",
                                                                                                                   "timing_static_enginecache_enable",
                                                                                                                   "timing_static_enginecache_disable"),
-                                                                                                  ExpandModelName);
+                                                                                                  AddTestName);
 
 TEST(TensorrtExecutionProviderTest, EngineCachingTest) {
   ScopedEnvironmentVariables scoped_env_vars{EnvVarMap{

From c371a6dc5e065f000e8bf3c8d6df15210a4992bd Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 5 Feb 2022 20:35:54 +0000
Subject: [PATCH 21/30] fix bug for CI

---
 .../providers/tensorrt/tensorrt_basic_test.cc | 64 +++++++++----------
 1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 4705821f1444a..c9ec5608a8541 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -20,7 +20,7 @@ using namespace ::onnxruntime::logging;
 namespace onnxruntime {
 
 namespace test {
-class TensorrtExecutionProviderCacheTest: public testing::TestWithParam<std::basic_string<ORTCHAR_T>> {};
+class TensorrtExecutionProviderCacheTest: public testing::TestWithParam<std::string> {};
 
 template <typename T>
 void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64_t>& expected_dims,
@@ -33,24 +33,24 @@ void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64
   ASSERT_EQ(expected_values, found);
 }
 
-bool IsTensorRTCacheExisted(std::basic_string<ORTCHAR_T> path, std::basic_string<ORTCHAR_T> file_extension) {
+bool IsTensorRTCacheExisted(std::string path, std::string file_extension) {
   for (const auto & entry : fs::directory_iterator(path)) {
-      if (file_extension.compare(fs::path(entry).extension()) == 0) {
+      if (fs::path(file_extension) == fs::path(entry).extension()) {
           return true;
       }
   }
   return false;
 }
 
-void RemoveTensorRTCache(std::basic_string<ORTCHAR_T> path, std::basic_string<ORTCHAR_T> file_extension) {
+void RemoveTensorRTCache(std::string path, std::string file_extension) {
   for (const auto & entry : fs::directory_iterator(path)) {
-      if (file_extension.compare(fs::path(entry).extension()) == 0) {
+      if (fs::path(file_extension) == fs::path(entry).extension()) {
           fs::remove(entry);
       }
   }
 }
 
-void CreateBaseModel(std::basic_string<ORTCHAR_T> model_name, std::basic_string<ORTCHAR_T> graph_name, bool is_dynamic_input_shape, std::vector<int> dims) {
+void CreateBaseModel(std::string model_name, std::string graph_name, bool is_dynamic_input_shape, std::vector<int> dims) {
   onnxruntime::Model model(graph_name, false, DefaultLoggingManager().DefaultLogger());
   auto& graph = model.MainGraph();
   std::vector<onnxruntime::NodeArg*> inputs;
@@ -92,26 +92,24 @@ void CreateBaseModel(std::basic_string<ORTCHAR_T> model_name, std::basic_string<
 }
 
 TEST_P(TensorrtExecutionProviderCacheTest, Run) {
-  // GetParam() consists of two main parameters:
-  // - cache type (engine cache, profile cache and timing cache)
-  // - input type (dynamic input shape or static input shape). 
-  // Note: it might have other paramters used for specific situation
-  std::basic_string<ORTCHAR_T> param = GetParam();
-  std::basic_string<ORTCHAR_T> input_type = "static";
-  std::basic_string<ORTCHAR_T> engine_info = "enginecache_disable"; // for timigh cache case only
-  size_t pos = param.find(ORT_TSTR("_"));
+  // GetParam() returns the parameter of following format:
+  // ##cache type##_##input shape type##_##other information if needed##
+  std::string param = GetParam();
+  std::string input_type = "static";
+  std::string engine_info = "enginecache_disable"; // for timigh cache case only
+  size_t pos = param.find("_");
   ASSERT_NE(pos, std::string::npos);
-  std::basic_string<ORTCHAR_T> cache_type = ToUTF8String(param.substr(0, pos));
+  std::string cache_type = ToUTF8String(param.substr(0, pos));
   if (cache_type.compare("timing") == 0) {
-    std::basic_string<ORTCHAR_T> suffix = param.substr(pos + 1);
-    size_t suffix_pos = suffix.find(ORT_TSTR("_"));
+    std::string suffix = param.substr(pos + 1);
+    size_t suffix_pos = suffix.find("_");
     input_type = ToUTF8String(suffix.substr(0, suffix_pos));
     engine_info = suffix.substr(suffix_pos + 1);
   } else {
     input_type = param.substr(pos + 1);
   }
 
-  std::basic_string<ORTCHAR_T> model_name = "trt_execution_provider_" + cache_type + "caching_test_" + input_type + ".onnx";
+  std::string model_name = "trt_execution_provider_" + cache_type + "caching_test_" + input_type + ".onnx";
   std::vector<int> dims; // static dims
   if (input_type.compare("dynamic") == 0) {
     dims.push_back(1);
@@ -269,27 +267,23 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
   }
 }
 
-auto AddTestName  = [](const ::testing::TestParamInfo<TensorrtExecutionProviderCacheTest::ParamType>& info) {
-  // use info.param here to generate the test suffix
-  std::basic_string<ORTCHAR_T> name = info.param;
-#ifdef _WIN32
-  // Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string<char...>.
-  // Need conversion of wchar_t to char.
-  return std::wstring_convert<std::codecvt_utf8<wchar_t>>().to_bytes(name);
-#else
-  return name;
-#endif
-};
-
-// timing_dynamic_enginecache_enable: timing cache enabled, dynamic input shape and engine cache enable
-// timing_dynamic_enginecache_disable: timing cache enabled, dynamic input shape and engine cache disable
-// timing_static_enginecache_enable: timing cache enabled, static input shape and engine cache enable
-// timing_static_enginecache_disable: timing cache enabled, static input shape and engine cache disable
+// The TensorrtExecutionProviderCacheTest aims to test the functionality of all the engine/profile/timing caches of ORT TRT.
+// It uses value-parameterized test and the parameter in the test is a composite parameter which has following format:  
+// ##cache type##_##input shape type##_##additional provider options if needed##
+// - cache type       (could be engine cache, profile cache or timing cache)
+// - input shape type (could be dynamic input shape or static input shape). 
+// 
+//
+// We have following test parameters: 
+// - timing_dynamic_enginecache_enable: timing cache enabled, dynamic input shape and engine cache enable
+// - timing_dynamic_enginecache_disable: timing cache enabled, dynamic input shape and engine cache disable
+// - timing_static_enginecache_enable: timing cache enabled, static input shape and engine cache enable
+// - timing_static_enginecache_disable: timing cache enabled, static input shape and engine cache disable
 INSTANTIATE_TEST_SUITE_P(TensorrtExecutionProviderCacheTests, TensorrtExecutionProviderCacheTest, testing::Values("timing_dynamic_enginecache_enable",
                                                                                                                   "timing_dynamic_enginecache_disable",
                                                                                                                   "timing_static_enginecache_enable",
                                                                                                                   "timing_static_enginecache_disable"),
-                                                                                                  AddTestName);
+                                                                                                  [](const ::testing::TestParamInfo<TensorrtExecutionProviderCacheTest::ParamType>& info) {return info.param;});
 
 TEST(TensorrtExecutionProviderTest, EngineCachingTest) {
   ScopedEnvironmentVariables scoped_env_vars{EnvVarMap{

From e513740bbb0a61425a6e0d01b288a10278a64203 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 5 Feb 2022 20:57:49 +0000
Subject: [PATCH 22/30] fix bug

---
 onnxruntime/test/providers/cpu/model_tests.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index d557df31357c0..0a72deeb76d48 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -22,7 +22,6 @@
 #include "test/onnx/heap_buffer.h"
 #include "test/onnx/onnx_model_info.h"
 #include "test/onnx/callback.h"
-#include "core/providers/tensorrt/tensorrt_provider_options.h"
 
 extern std::unique_ptr<Ort::Env> ort_env;
 
@@ -600,7 +599,7 @@ TEST_P(ModelTest, Run) {
               1000,
               1,
               1 << 30,
-              0,
+              1, // enable fp16
               0,
               nullptr,
               0,

From 4a45c30d4c18cdfc5b9f3e679c0ba8c56c263099 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 5 Feb 2022 22:58:27 +0000
Subject: [PATCH 23/30] fix bug

---
 onnxruntime/python/onnxruntime_pybind_state.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 56c59406bcd56..aa9e7df01bb9b 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -393,6 +393,7 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
             nullptr,
             0,
             nullptr,
+            0,
             0};
         for (auto option : it->second) {
           if (option.first == "device_id") {

From e38556a26b018df4b241284a5414d1ed847f2cd6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20M=C3=BCller?= <maximilianm@nvidia.com>
Date: Wed, 8 Feb 2023 17:50:24 +0100
Subject: [PATCH 24/30] timing cache test

---
 .../external/onnxruntime_external_deps.cmake  |   2 +-
 .../tensorrt/tensorrt_execution_provider.cc   |  26 +-
 onnxruntime/test/providers/cpu/model_tests.cc |   2 +-
 .../providers/tensorrt/tensorrt_basic_test.cc | 325 ++++--------------
 4 files changed, 83 insertions(+), 272 deletions(-)

diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake
index 61844b36aa4b3..01ae640748ef0 100644
--- a/cmake/external/onnxruntime_external_deps.cmake
+++ b/cmake/external/onnxruntime_external_deps.cmake
@@ -455,7 +455,7 @@ if (onnxruntime_USE_CUDA)
         list(APPEND onnxruntime_LINK_DIRS ${onnxruntime_CUDA_HOME}/x64/lib64)
       else()
         if(onnxruntime_CUDNN_HOME)
-          list(APPEND onnxruntime_LINK_DIRS ${onnxruntime_CUDNN_HOME}/lib64)
+          list(APPEND onnxruntime_LINK_DIRS  ${onnxruntime_CUDNN_HOME}/lib ${onnxruntime_CUDNN_HOME}/lib64)
         endif()
         list(APPEND onnxruntime_LINK_DIRS ${onnxruntime_CUDA_HOME}/lib64)
       endif()
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 9cbe018b3ffd4..82384d208bcc6 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -1483,20 +1483,20 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
               file.write(reinterpret_cast<char*>(serializedModel->data()), engine_size);
             }
             serializedModel->destroy();
-            LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized " + engine_cache_path;
+            LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized engine " + engine_cache_path;
+          }
+          // serialize and save timing cache
+          if (timing_cache_enable_)
+          {
+              auto timing_cache = trt_config->getTimingCache();
+              std::unique_ptr<nvinfer1::IHostMemory> timingCacheHostData{timing_cache->serialize()};
+              if (timingCacheHostData == nullptr) {
+                return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
+                                      "TensorRT EP could not serialize timing cache: " + timing_cache_path);
+              }
+              saveTimingCacheFile(timing_cache_path, timingCacheHostData.get());
+              LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path;
           }
-        }
-
-        // serialize and save timing cache
-        if (timing_cache_enable_)
-        {
-            auto timing_cache = trt_config->getTimingCache();
-            std::unique_ptr<nvinfer1::IHostMemory> timingCacheHostData{timing_cache->serialize()};
-            if (timingCacheHostData == nullptr) {
-              return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
-                                     "TensorRT EP could not serialize timing cache: " + timing_cache_path);
-            }
-            saveTimingCacheFile(timing_cache_path, timingCacheHostData.get());
         }
       }
 
diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 206f934a342f7..5adfcda2a38ea 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -687,7 +687,7 @@ TEST_P(ModelTest, Run) {
         if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) {
           OrtTensorRTProviderOptionsV2 params{0, 0, nullptr, 1000, 1, 1 << 30,
                                               1,  // enable fp16
-                                              0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0};
+                                              0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0};
           ASSERT_ORT_STATUS_OK(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2(ortso, &params));
         } else {
           OrtTensorRTProviderOptionsV2* ep_option;
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 1e8ffe33cd1b8..6c2b051db3541 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -12,7 +12,7 @@
 #include <string>
 #include <thread>
 #include <filesystem>
-namespace fs = std::filesystem;
+#include <chrono>
 
 using namespace std;
 using namespace ONNX_NAMESPACE;
@@ -153,6 +153,7 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string
       nullptr,
       0,
       0,
+      0,
       0};
 
     params.trt_engine_cache_enable = 1;
@@ -224,6 +225,7 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string
       nullptr,
       0,
       0,
+      0,
       0};
 
     params.trt_engine_cache_enable = 1;
@@ -251,259 +253,6 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string
       th.join();
 }
 
-    bool IsTensorRTCacheExisted(std::string path, std::string file_extension) {
-      for (const auto & entry : fs::directory_iterator(path)) {
-        if (fs::path(file_extension) == fs::path(entry).extension()) {
-          return true;
-        }
-      }
-      return false;
-    }
-
-    void RemoveTensorRTCache(std::string path, std::string file_extension) {
-      for (const auto & entry : fs::directory_iterator(path)) {
-        if (fs::path(file_extension) == fs::path(entry).extension()) {
-          fs::remove(entry);
-        }
-      }
-    }
-
-    void CreateBaseModel(std::string model_name, std::string graph_name, bool is_dynamic_input_shape, std::vector<int> dims) {
-      onnxruntime::Model model(graph_name, false, DefaultLoggingManager().DefaultLogger());
-      auto& graph = model.MainGraph();
-      std::vector<onnxruntime::NodeArg*> inputs;
-      std::vector<onnxruntime::NodeArg*> outputs;
-
-      // FLOAT tensor
-      ONNX_NAMESPACE::TypeProto float_tensor;
-      float_tensor.mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
-
-      for (auto dim: dims) {
-        float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(dim);
-      }
-
-      if (is_dynamic_input_shape) {
-        float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("sym1");
-        float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("sym2");
-      }
-
-      auto& input_arg_1 = graph.GetOrCreateNodeArg("X", &float_tensor);
-      auto& input_arg_2 = graph.GetOrCreateNodeArg("Y", &float_tensor);
-      inputs.push_back(&input_arg_1);
-      inputs.push_back(&input_arg_2);
-      auto& output_arg = graph.GetOrCreateNodeArg("node_1_out_1", &float_tensor);
-      outputs.push_back(&output_arg);
-      graph.AddNode("node_1", "Add", "node 1.", inputs, outputs);
-
-      auto& input_arg_3 = graph.GetOrCreateNodeArg("Z", &float_tensor);
-      inputs.clear();
-      inputs.push_back(&output_arg);
-      inputs.push_back(&input_arg_3);
-      auto& output_arg_2 = graph.GetOrCreateNodeArg("M", &float_tensor);
-      outputs.clear();
-      outputs.push_back(&output_arg_2);
-      graph.AddNode("node_2", "Add", "node 2.", inputs, outputs);
-
-      auto status = graph.Resolve();
-      ASSERT_TRUE(status.IsOK());
-      status = onnxruntime::Model::Save(model, model_name);
-    }
-
-    TEST_P(TensorrtExecutionProviderCacheTest, Run) {
-    // GetParam() returns the parameter of following format:
-    // ##cache type##_##input shape type##_##other information if needed##
-    std::string param = GetParam();
-    std::string input_type = "static";
-    std::string engine_info = "enginecache_disable"; // for timigh cache case only
-    size_t pos = param.find("_");
-    ASSERT_NE(pos, std::string::npos);
-    std::string cache_type = ToUTF8String(param.substr(0, pos));
-    if (cache_type.compare("timing") == 0) {
-    std::string suffix = param.substr(pos + 1);
-    size_t suffix_pos = suffix.find("_");
-    input_type = ToUTF8String(suffix.substr(0, suffix_pos));
-    engine_info = suffix.substr(suffix_pos + 1);
-} else {
-    input_type = param.substr(pos + 1);
-}
-
-std::string model_name = "trt_execution_provider_" + cache_type + "caching_test_" + input_type + ".onnx";
-std::vector<int> dims; // static dims
-if (input_type.compare("dynamic") == 0) {
-dims.push_back(1);
-CreateBaseModel(model_name, cache_type + "cachingtest", true, dims); // dynamic input shape
-// dims is (1, sym1, sym2)
-}
-else {
-dims.push_back(1);
-dims.push_back(3);
-dims.push_back(2);
-CreateBaseModel(model_name, cache_type + "cachingtest", false, dims); // non-dynamic input shape
-// dims is (1, 3, 2)
-}
-
-SessionOptions so;
-so.session_logid = "TensorrtExecutionProvider" + cache_type + "cacheTest";
-RunOptions run_options;
-run_options.run_tag = so.session_logid;
-InferenceSession session_object{so, GetEnvironment()};
-auto allocator_manager = session_object.GetAllocatorManager();
-auto cuda_provider = DefaultCudaExecutionProvider();
-cuda_provider->RegisterAllocator(allocator_manager);
-auto cpu_allocator = cuda_provider->GetAllocator(0, OrtMemTypeCPU);
-std::vector<int64_t> dims_mul_x = {1, 3, 2};
-std::vector<float> values_mul_x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
-OrtValue ml_value_x;
-CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_x);
-OrtValue ml_value_y;
-CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_y);
-OrtValue ml_value_z;
-CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_z);
-NameMLValMap feeds;
-feeds.insert(std::make_pair("X", ml_value_x));
-feeds.insert(std::make_pair("Y", ml_value_y));
-feeds.insert(std::make_pair("Z", ml_value_z));
-
-// prepare outputs
-std::vector<std::string> output_names;
-output_names.push_back("M");
-std::vector<OrtValue> fetches;
-
-// prepare expected inputs and outputs
-std::vector<int64_t> expected_dims_mul_m = {1, 3, 2};
-std::vector<float> expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f};
-
-OrtTensorRTProviderOptionsV2 params{
-        0,
-        0,
-        nullptr,
-        1000,
-        1,
-        1 << 30,
-        0,
-        0,
-        nullptr,
-        0,
-        0,
-        0,
-        0,
-        0,
-        nullptr,
-        0,
-        nullptr,
-        0,
-        0};
-
-if (cache_type.compare("timing") == 0) {
-
-// create ort session
-params.trt_timing_cache_enable = 1;
-if (engine_info.compare("enginecache_enable") == 0)
-params.trt_engine_cache_enable = 1;
-std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
-EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
-auto status = session_object.Load(model_name);
-ASSERT_TRUE(status.IsOK());
-status = session_object.Initialize();
-ASSERT_TRUE(status.IsOK());
-
-// run inference
-// timing cache should be created under the situation of non-dynamic/dynamic shape input and engine cache enabled/disabled
-status = session_object.Run(run_options, feeds, output_names, &fetches);
-ASSERT_TRUE(status.IsOK());
-VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m);
-ASSERT_TRUE(IsTensorRTCacheExisted("./", ".timing"));
-RemoveTensorRTCache("./", ".timing");
-
-// run inference
-// timing cache shoud not be used or created since input shape is not changed and engine won't be re-built
-status = session_object.Run(run_options, feeds, output_names, &fetches);
-ASSERT_TRUE(status.IsOK());
-VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m);
-ASSERT_TRUE(!IsTensorRTCacheExisted("./", ".timing"));
-
-// create another ort session to test
-InferenceSession session_object_2{so, GetEnvironment()};
-execution_provider = TensorrtExecutionProviderWithOptions(&params);
-EXPECT_TRUE(session_object_2.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
-status = session_object_2.Load(model_name);
-ASSERT_TRUE(status.IsOK());
-status = session_object_2.Initialize();
-ASSERT_TRUE(status.IsOK());
-
-if (engine_info.compare("enginecache_enable") == 0) {
-// engine cache is enabled
-
-// run inference
-// timing cache shoud not be created since engine cache is existed and will be used
-status = session_object_2.Run(run_options, feeds, output_names, &fetches);
-ASSERT_TRUE(status.IsOK());
-VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m);
-ASSERT_TRUE(!IsTensorRTCacheExisted("./", ".timing"));
-} else {
-// engine cache is not enabled
-
-// run inference
-// timing cache shoud be created
-status = session_object_2.Run(run_options, feeds, output_names, &fetches);
-ASSERT_TRUE(status.IsOK());
-VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m);
-ASSERT_TRUE(IsTensorRTCacheExisted("./", ".timing"));
-RemoveTensorRTCache("./", ".timing");
-}
-
-if (input_type.compare("dynamic") == 0) {
-// dynamic input shape
-
-// inference run with input shape {1, 1, 6}
-// timing cache will be created
-// TRT engine and profile will be updated
-dims_mul_x = {1, 1, 6};
-CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_x);
-CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_y);
-CreateMLValue<float>(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_z);
-feeds.clear();
-feeds.insert(std::make_pair("X", ml_value_x));
-feeds.insert(std::make_pair("Y", ml_value_y));
-feeds.insert(std::make_pair("Z", ml_value_z));
-
-status = session_object_2.Run(run_options, feeds, output_names, &fetches);
-ASSERT_TRUE(status.IsOK());
-VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m);
-ASSERT_TRUE(IsTensorRTCacheExisted("./", ".timing"));
-}
-
-// clean up caches for another session
-RemoveTensorRTCache("./", ".timing");
-RemoveTensorRTCache("./", ".profile");
-RemoveTensorRTCache("./", ".engine");
-
-} else if (cache_type.compare("engine") == 0) {
-// #TODO
-} else if (cache_type.compare("profile") == 0) {
-// #TODO
-}
-}
-
-// The TensorrtExecutionProviderCacheTest aims to test the functionality of all the engine/profile/timing caches of ORT TRT.
-// It uses value-parameterized test and the parameter in the test is a composite parameter which has following format:
-// ##cache type##_##input shape type##_##additional provider options if needed##
-// - cache type       (could be engine cache, profile cache or timing cache)
-// - input shape type (could be dynamic input shape or static input shape).
-//
-//
-// We have following test parameters:
-// - timing_dynamic_enginecache_enable: timing cache enabled, dynamic input shape and engine cache enable
-// - timing_dynamic_enginecache_disable: timing cache enabled, dynamic input shape and engine cache disable
-// - timing_static_enginecache_enable: timing cache enabled, static input shape and engine cache enable
-// - timing_static_enginecache_disable: timing cache enabled, static input shape and engine cache disable
-INSTANTIATE_TEST_SUITE_P(TensorrtExecutionProviderCacheTests, TensorrtExecutionProviderCacheTest, testing::Values("timing_dynamic_enginecache_enable",
-                                                                                                                  "timing_dynamic_enginecache_disable",
-                                                                                                                  "timing_static_enginecache_enable",
-                                                                                                                  "timing_static_enginecache_disable"),
-[](const ::testing::TestParamInfo<TensorrtExecutionProviderCacheTest::ParamType>& info) {return info.param;});
-
-
 TEST(TensorrtExecutionProviderTest, SessionCreationWithMultiThreadsAndInferenceWithMultiThreads) {
   std::vector<std::thread> threads;
   std::string model_name = "trt_execution_provider_multithreading_test.onnx";
@@ -680,6 +429,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
       nullptr,
       0,
       0,
+      0,
       0};
 
   if (cache_type.compare("engine") == 0) {
@@ -792,9 +542,68 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
     }
   } else if (cache_type.compare("timing") == 0) {
      // add test code here
+
+    /* Following code block tests the functionality of engine and optimization profile of ORT TRT, including:
+     * - timing cache cache serialization/de-serialization
+     * - benefir of usign a timing cache no matter if dynamic / static input
+     */
+    uint64_t compilation_without_cache_ms, compilation_with_cache_ms;
+
+    params.trt_timing_cache_enable = 1;
+    //  std::chrono
+    {
+      auto start = chrono::steady_clock::now();
+      std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
+      EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
+      auto status = session_object.Load(model_name);
+      ASSERT_TRUE(status.IsOK());
+      status = session_object.Initialize();
+      ASSERT_TRUE(status.IsOK());
+
+      // run inference
+      // TRT timing cache should be created under the situation of non-dynamic/dynamic shape input
+      status = session_object.Run(run_options, feeds, output_names, &fetches);
+      auto end = chrono::steady_clock::now();
+      ASSERT_TRUE(status.IsOK());
+      VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m);
+      ASSERT_TRUE(IsCacheExistedByType("./", ".timing"));
+      compilation_without_cache_ms = chrono::duration_cast<chrono::microseconds>(end - start).count();
+    }
+
+    // get new session and reinitialize model
+    // second same inference should resuse the cache and therefore have a faster build
+    if (input_type.compare("static") == 0) {
+      {
+        InferenceSession session_object_new{so, GetEnvironment()};
+        {
+          auto start = chrono::steady_clock::now();
+          std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
+          EXPECT_TRUE(session_object_new.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
+          auto status = session_object_new.Load(model_name);
+          ASSERT_TRUE(status.IsOK());
+          status = session_object_new.Initialize();
+          ASSERT_TRUE(status.IsOK());
+
+          // run inference
+          // TRT timing cache should be created under the situation of non-dynamic/dynamic shape input
+          status = session_object_new.Run(run_options, feeds, output_names, &fetches);
+          // TODO narrow down actual compilation section
+          auto end = chrono::steady_clock::now();
+
+          ASSERT_TRUE(status.IsOK());
+          VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m);
+          ASSERT_TRUE(IsCacheExistedByType("./", ".timing"));
+          compilation_with_cache_ms = chrono::duration_cast<chrono::microseconds>(end - start).count();
+        }
+      }
+      ASSERT_TRUE(compilation_with_cache_ms <= compilation_without_cache_ms);
+    } else {
+      // TODO test dynamic shapes
+    }
   }
 
   // clean up caches
+  RemoveCachesByType("./", ".timing");
   RemoveCachesByType("./", ".engine");
   RemoveCachesByType("./", ".profile");
 }
@@ -809,11 +618,13 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
  * We have following test parameters:
  * - engine_static: engine cache enabled with non-dynamic input shape
  * - engine_dynamic: engine cache enabled with dynamic input shape
- * - timing_static: will be added
- * - timing_dynamic: will be added
+ * - timing_static: timing cache enabled, static input shape
+ * - timing_dynamic: timing cache enabled, static input shape
  */
 INSTANTIATE_TEST_SUITE_P(TensorrtExecutionProviderCacheTests, TensorrtExecutionProviderCacheTest, testing::Values("engine_static",
-                                                                                                                  "engine_dynamic"),
+                                                                                                                  "engine_dynamic",
+                                                                                                                  "timing_static",
+                                                                                                                  "timing_dynamic"),
                                                                                                   [](const ::testing::TestParamInfo<TensorrtExecutionProviderCacheTest::ParamType>& info) {return info.param;});
 
 TEST(TensorrtExecutionProviderTest, FunctionTest) {

From bf0b880081c45224e0bd7eb20c3cd20179e8d55c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20M=C3=BCller?= <maximilianm@nvidia.com>
Date: Wed, 22 Feb 2023 11:50:00 +0100
Subject: [PATCH 25/30] append compute capability to cache and add force option

---
 .../tensorrt/tensorrt_provider_options.h      |  3 +-
 .../core/session/onnxruntime_c_api.h          |  4 ++-
 .../tensorrt/tensorrt_execution_provider.cc   | 29 ++++++++++++++++---
 .../tensorrt/tensorrt_execution_provider.h    |  2 ++
 .../tensorrt_execution_provider_info.cc       | 10 ++++---
 .../tensorrt_execution_provider_info.h        |  5 ++--
 .../tensorrt_execution_provider_utils.h       | 13 ++++++++-
 .../tensorrt/tensorrt_provider_factory.cc     |  2 ++
 .../core/session/provider_bridge_ort.cc       |  2 ++
 onnxruntime/test/perftest/ort_test_session.cc |  2 ++
 onnxruntime/test/providers/cpu/model_tests.cc |  2 +-
 .../providers/tensorrt/tensorrt_basic_test.cc |  3 ++
 onnxruntime/test/util/default_providers.cc    |  2 ++
 13 files changed, 65 insertions(+), 14 deletions(-)

diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
index 89b47daaeecf8..d3d35b9cdd3fa 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
@@ -5,7 +5,7 @@
 
 /// <summary>
 /// Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT_V2.
-/// Please note that this struct is *similar* to OrtTensorRTProviderOptions but only to be used internally. 
+/// Please note that this struct is *similar* to OrtTensorRTProviderOptions but only to be used internally.
 /// Going forward, new trt provider options are to be supported via this struct and usage of the publicly defined
 /// OrtTensorRTProviderOptions will be deprecated over time.
 /// User can only get the instance of OrtTensorRTProviderOptionsV2 via CreateTensorRTProviderOptions.
@@ -32,4 +32,5 @@ struct OrtTensorRTProviderOptionsV2 {
   int trt_context_memory_sharing_enable;        // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true
   int trt_layer_norm_fp32_fallback;             // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true
   int trt_timing_cache_enable;                  // enable TensorRT timing cache. Default 0 = false, nonzero = true
+  int trt_force_timing_cache;                   // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
 };
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 44c41e5d1f587..54f849d68fbac 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -528,6 +528,8 @@ typedef struct OrtTensorRTProviderOptions {
   int trt_dla_core;                             // DLA core number. Default 0
   int trt_dump_subgraphs;                       // dump TRT subgraph. Default 0 = false, nonzero = true
   int trt_engine_cache_enable;                  // enable engine caching. Default 0 = false, nonzero = true
+  int trt_timing_cache_enable;                  // enable TensorRT timing cache. Default 0 = false, nonzero = true
+  int trt_force_timing_cache;                   // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
   const char* trt_engine_cache_path;            // specify engine cache path
   int trt_engine_decryption_enable;             // enable engine decryption. Default 0 = false, nonzero = true
   const char* trt_engine_decryption_lib_path;   // specify engine decryption library path
@@ -3632,7 +3634,7 @@ struct OrtApi {
   * 2. For windows, ort will infer the group id from a logical processor id, for example, assuming there are two groups with each has 64 logical processors,
   *    an id of 64 will be inferred as the last processor of the 1st group, while 65 will be interpreted as the 1st processor of the second group.
   *    Hence 64-65 is an invalid configuration, because a windows thread cannot be attached to processors across group boundary.
-  * 
+  *
   *  \since Version 1.14
   */
   ORT_API2_STATUS(SetGlobalIntraOpThreadAffinity, _Inout_ OrtThreadingOptions* tp_options, const char* affinity_string);
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 82384d208bcc6..3078a7b608e80 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -422,6 +422,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
       timing_cache_enable_ = (std::stoi(timing_cache_enable_env) == 0 ? false : true);
     }
 
+    const std::string timing_force_match_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kForceTimingCache);
+    if (!timing_force_match_env.empty()) {
+      force_timing_cache_match_ = (std::stoi(timing_force_match_env) == 0 ? false : true);
+    }
+
     if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
       const std::string engine_cache_path = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEngineCachePath);
       cache_path_ = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kCachePath);
@@ -1404,7 +1409,9 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
     if (!has_dynamic_shape) {
       const std::string cache_path = GetCachePath(cache_path_, trt_node_name_with_precision);
       const std::string engine_cache_path = cache_path + ".engine";
-      const std::string timing_cache_path = cache_path + ".timing";
+      cudaDeviceProp prop;
+      CUDA_CALL_THROW(cudaGetDeviceProperties(&prop, device_id_));
+      const std::string timing_cache_path = GetTimingCachePath(cache_path_, prop);
       {
         // ifstream file check, engine serialization/deserialization and engine build are in critical section. It needs lock protection to prevent race condition when inferencing with multithreading.
         auto lock = GetApiLock();
@@ -1460,15 +1467,21 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
               return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
                                      "TensorRT EP could not create timing cache: " + timing_cache_path);
             }
-            trt_config->setTimingCache(*timing_cache, false);
+            trt_config->setTimingCache(*timing_cache, force_timing_cache_match_);
+            LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path;
         }
 
         // Build engine
+          auto engine_build_start = std::chrono::steady_clock::now();
           trt_engine = tensorrt_ptr::unique_pointer<nvinfer1::ICudaEngine>(trt_builder->buildEngineWithConfig(*trt_network, *trt_config));
           if (trt_engine == nullptr) {
             return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
                                    "TensorRT EP could not build engine for fused node: " + fused_node.Name());
           }
+          auto engine_build_stop = std::chrono::steady_clock::now();
+          LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_node_name_with_precision << " took: " <<
+                  std::chrono::duration_cast<std::chrono::milliseconds>(engine_build_stop - engine_build_start).count() << "ms" << std::endl;
+
           if (engine_cache_enable_) {
             nvinfer1::IHostMemory* serializedModel = trt_engine->serialize();
             size_t engine_size = serializedModel->size();
@@ -1602,7 +1615,9 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
       const std::string cache_path = GetCachePath(trt_state->engine_cache_path, trt_state->trt_node_name_with_precision);
       const std::string engine_cache_path = cache_path + ".engine";
       const std::string profile_cache_path = cache_path + ".profile";
-      const std::string timing_cache_path = cache_path + ".timing";
+      cudaDeviceProp prop;
+      CUDA_CALL_THROW(cudaGetDeviceProperties(&prop, 0));
+      const std::string timing_cache_path = GetTimingCachePath(trt_state->engine_cache_path, prop);
       if (trt_state->engine_cache_enable && trt_engine == nullptr) {
         std::ifstream engine_file(engine_cache_path, std::ios::binary | std::ios::in);
         std::ifstream profile_file(profile_cache_path, std::ios::binary | std::ios::in);
@@ -1846,14 +1861,20 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
               return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
                                      "TensorRT EP could not create timing cache: " + timing_cache_path);
             }
-            trt_config->setTimingCache(*timing_cache, false);
+            trt_config->setTimingCache(*timing_cache, force_timing_cache_match_);
+            LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path;
         }
 
         // Build engine
         {
           auto lock = GetApiLock();
+          auto engine_build_start = std::chrono::steady_clock::now();
           *(trt_state->engine) = tensorrt_ptr::unique_pointer<nvinfer1::ICudaEngine>(
               trt_builder->buildEngineWithConfig(*trt_state->network->get(), *trt_config));
+          auto engine_build_stop = std::chrono::steady_clock::now();
+          LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_state->trt_node_name_with_precision << " took: " <<
+                  std::chrono::duration_cast<std::chrono::milliseconds>(engine_build_stop - engine_build_start).count() << "ms" << std::endl;
+
         }
         if (trt_state->engine == nullptr) {
           return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP Failed to Build Engine.");
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index 5570061c23f0b..4c16de27bd94a 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -31,6 +31,7 @@ static const std::string kForceSequentialEngineBuild= "ORT_TENSORRT_FORCE_SEQUEN
 static const std::string kContextMemorySharingEnable= "ORT_TENSORRT_CONTEXT_MEMORY_SHARING_ENABLE";
 static const std::string kLayerNormFP32Fallback= "ORT_TENSORRT_LAYER_NORM_FP32_FALLBACK";
 static const std::string kTimingCacheEnable = "ORT_TENSORRT_TIMING_CACHE_ENABLE";
+static const std::string kForceTimingCache = "ORT_TENSORRT_FORCE_TIMING_CACHE_ENABLE";
 // Old env variable for backward compatibility
 static const std::string kEngineCachePath = "ORT_TENSORRT_ENGINE_CACHE_PATH";
 }  // namespace tensorrt_env_vars
@@ -179,6 +180,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   int (*engine_decryption_)(const char*, char*, size_t*);
   int (*engine_encryption_)(const char*, char*, size_t);
   bool timing_cache_enable_ = false;
+  bool force_timing_cache_match_ = false;
 
   std::unordered_set<std::string> control_flow_op_set_ = {"If", "Loop", "Scan"};
   std::unordered_map<std::string, tensorrt_ptr::unique_pointer<nvonnxparser::IParser>> parsers_;
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
index 9bc1997e44371..c79ddf5bcc985 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
@@ -27,12 +27,13 @@ constexpr const char* kCachePath = "trt_engine_cache_path";
 constexpr const char* kDecryptionEnable = "trt_engine_decryption_enable";
 constexpr const char* kDecryptionLibPath = "trt_engine_decryption_lib_path";
 constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine_build";
-// add new provider option name here. 
+// add new provider option name here.
 constexpr const char* kContextMemorySharingEnable = "trt_context_memory_sharing_enable";
 constexpr const char* kLayerNormFP32Fallback = "trt_layer_norm_fp32_fallback";
 constexpr const char* kTimingCacheEnable = "trt_timing_cache_enable";
+constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache_match";
 }  // namespace provider_option_names
-}  // namespace tensorrt 
+}  // namespace tensorrt
 
 TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions(const ProviderOptions& options) {
   TensorrtExecutionProviderInfo info{};
@@ -58,16 +59,17 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions
           .AddAssignmentToReference(tensorrt::provider_option_names::kInt8CalibTable, info.int8_calibration_table_name)
           .AddAssignmentToReference(tensorrt::provider_option_names::kInt8UseNativeCalibTable, info.int8_use_native_calibration_table)
           .AddAssignmentToReference(tensorrt::provider_option_names::kDLAEnable, info.dla_enable)
-          .AddAssignmentToReference(tensorrt::provider_option_names::kDLACore, info.dla_core)		  
+          .AddAssignmentToReference(tensorrt::provider_option_names::kDLACore, info.dla_core)
           .AddAssignmentToReference(tensorrt::provider_option_names::kDumpSubgraphs, info.dump_subgraphs)
           .AddAssignmentToReference(tensorrt::provider_option_names::kEngineCacheEnable, info.engine_cache_enable)
           .AddAssignmentToReference(tensorrt::provider_option_names::kCachePath, info.engine_cache_path)
           .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionEnable, info.engine_decryption_enable)
-          .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path) 
+          .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path)
           .AddAssignmentToReference(tensorrt::provider_option_names::kForceSequentialEngineBuild, info.force_sequential_engine_build)
           .AddAssignmentToReference(tensorrt::provider_option_names::kContextMemorySharingEnable, info.context_memory_sharing_enable)
           .AddAssignmentToReference(tensorrt::provider_option_names::kLayerNormFP32Fallback, info.layer_norm_fp32_fallback)
           .AddAssignmentToReference(tensorrt::provider_option_names::kTimingCacheEnable, info.timing_cache_enable)
+          .AddAssignmentToReference(tensorrt::provider_option_names::kForceTimingCacheMatch, info.force_timing_cache)
           .Parse(options)); // add new provider option here.
 
   return info;
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
index 2d4b8bd6df81f..bacc94795d69e 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
@@ -17,10 +17,10 @@ struct TensorrtExecutionProviderInfo {
   void* user_compute_stream{nullptr};
   bool has_trt_options{false};
   int max_partition_iterations{1000};
-  int min_subgraph_size{1};  
+  int min_subgraph_size{1};
   size_t max_workspace_size{1 << 30};
   bool fp16_enable{false};
-  bool int8_enable{false}; 
+  bool int8_enable{false};
   std::string int8_calibration_table_name{""};
   bool int8_use_native_calibration_table{false};
   bool dla_enable{false};
@@ -34,6 +34,7 @@ struct TensorrtExecutionProviderInfo {
   bool context_memory_sharing_enable{false};
   bool layer_norm_fp32_fallback{false};
   bool timing_cache_enable{false};
+  bool force_timing_cache{false};
 
   static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
   static ProviderOptions ToProviderOptions(const TensorrtExecutionProviderInfo& info);
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
index a04b971c6a7b9..d28e3d743df0e 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
@@ -167,10 +167,21 @@ std::string GetCachePath(const std::string& root, const std::string& name) {
   }
 }
 
+/*
+ * Get Timing by compute capability
+ *
+ */
+std::string GetTimingCachePath(const std::string& root, cudaDeviceProp prop) {
+  // append compute capability of the GPU as this invalidates the cache and TRT will throw when loading the cache
+  const std::string timing_cache_name = "TensorrtExecutionProvider_cache_cc"  +
+      std::to_string(prop.major * 10 + prop.minor) + ".timing";
+  return GetCachePath(root, timing_cache_name);
+}
+
 /*
  * Get cache by type
  *
- * \param root root path of the cache  
+ * \param root root path of the cache
  * \param file_extension It could be ".engine", ".profile" or ".timing"
 */
 std::vector<fs::path> GetCachesByType(const std::string& root, std::string file_extension) {
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
index 8b7e3c55ea396..e86730a7e58dd 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
@@ -71,6 +71,7 @@ struct Tensorrt_Provider : Provider {
     info.context_memory_sharing_enable = options.trt_context_memory_sharing_enable != 0;
     info.layer_norm_fp32_fallback = options.trt_layer_norm_fp32_fallback != 0;
     info.timing_cache_enable = options.trt_timing_cache_enable != 0;
+    info.force_timing_cache = options.trt_force_timing_cache != 0;
     return std::make_shared<TensorrtProviderFactory>(info);
   }
 
@@ -139,6 +140,7 @@ struct Tensorrt_Provider : Provider {
     trt_options.trt_context_memory_sharing_enable = internal_options.context_memory_sharing_enable;
     trt_options.trt_layer_norm_fp32_fallback = internal_options.layer_norm_fp32_fallback;
     trt_options.trt_timing_cache_enable = internal_options.timing_cache_enable;
+    trt_options.trt_force_timing_cache = internal_options.force_timing_cache;
   }
 
   ProviderOptions GetProviderOptions(const void* provider_options) override {
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 9795b34f359b3..ba97f75b624aa 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1266,6 +1266,7 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti
   // Add new provider option below
   // Use default value as this field is not available in OrtTensorRTProviderOptions
   trt_options_converted.trt_timing_cache_enable = 0;
+  trt_options_converted.trt_force_timing_cache = 0;
   trt_options_converted.trt_context_memory_sharing_enable = 0;
   trt_options_converted.trt_layer_norm_fp32_fallback = 0;
   return trt_options_converted;
@@ -1575,6 +1576,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateTensorRTProviderOptions, _Outptr_ OrtTensorRT
   (*out)->trt_context_memory_sharing_enable = false;
   (*out)->trt_layer_norm_fp32_fallback = false;
   (*out)->trt_timing_cache_enable = false;
+  (*out)->trt_force_timing_cache = false;
   return nullptr;
 #else
   ORT_UNUSED_PARAMETER(out);
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index c624a83d3d925..22ba61412b8d0 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -76,6 +76,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
     bool trt_context_memory_sharing_enable = false;
     bool trt_layer_norm_fp32_fallback = false;
     bool trt_timing_cache_enable = false;
+    bool trt_force_timing_cache = false;
 
 #ifdef _MSC_VER
     std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
@@ -257,6 +258,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
     tensorrt_options.trt_context_memory_sharing_enable = trt_context_memory_sharing_enable;
     tensorrt_options.trt_layer_norm_fp32_fallback = trt_layer_norm_fp32_fallback;
     tensorrt_options.trt_timing_cache_enable = trt_timing_cache_enable;
+    tensorrt_options.trt_force_timing_cache = trt_force_timing_cache;
     session_options.AppendExecutionProvider_TensorRT_V2(tensorrt_options);
 
     OrtCUDAProviderOptions cuda_options;
diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 5adfcda2a38ea..fe93e2fc75255 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -687,7 +687,7 @@ TEST_P(ModelTest, Run) {
         if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) {
           OrtTensorRTProviderOptionsV2 params{0, 0, nullptr, 1000, 1, 1 << 30,
                                               1,  // enable fp16
-                                              0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0};
+                                              0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0, 0};
           ASSERT_ORT_STATUS_OK(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2(ortso, &params));
         } else {
           OrtTensorRTProviderOptionsV2* ep_option;
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 6c2b051db3541..31b538e4a6d02 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -154,6 +154,7 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string
       0,
       0,
       0,
+      0,
       0};
 
     params.trt_engine_cache_enable = 1;
@@ -226,6 +227,7 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string
       0,
       0,
       0,
+      0,
       0};
 
     params.trt_engine_cache_enable = 1;
@@ -430,6 +432,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
       0,
       0,
       0,
+      0,
       0};
 
   if (cache_type.compare("engine") == 0) {
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index 044c037365d63..869b002279f8f 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -39,6 +39,8 @@ std::unique_ptr<IExecutionProvider> DefaultTensorrtExecutionProvider() {
       0,
       0,
       0,
+      0,
+      0,
       nullptr,
       0,
       nullptr,

From 244b437ed11dfd9f0029d1e24a7580a6baadf00e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20M=C3=BCller?= <maximilianm@nvidia.com>
Date: Wed, 22 Feb 2023 11:50:40 +0100
Subject: [PATCH 26/30] Take timing of first warm up inference

---
 onnxruntime/test/perftest/performance_runner.cc | 5 +++++
 onnxruntime/test/perftest/performance_runner.h  | 1 +
 2 files changed, 6 insertions(+)

diff --git a/onnxruntime/test/perftest/performance_runner.cc b/onnxruntime/test/perftest/performance_runner.cc
index 2039c65b53aa6..b27ded96d85a9 100644
--- a/onnxruntime/test/perftest/performance_runner.cc
+++ b/onnxruntime/test/perftest/performance_runner.cc
@@ -114,7 +114,9 @@ Status PerformanceRunner::Run() {
   }
 
   // warm up
+  initial_inference_result_.start = std::chrono::high_resolution_clock::now();
   ORT_RETURN_IF_ERROR(RunOneIteration<true>());
+  initial_inference_result_.end = std::chrono::high_resolution_clock::now();
 
   // TODO: start profiling
   // if (!performance_test_config_.run_config.profile_file.empty())
@@ -139,9 +141,12 @@ Status PerformanceRunner::Run() {
   std::chrono::duration<double> session_create_duration = session_create_end_ - session_create_start_;
   // TODO: end profiling
   // if (!performance_test_config_.run_config.profile_file.empty()) session_object->EndProfiling();
+  auto first_inference_duration =
+    std::chrono::duration_cast<std::chrono::milliseconds>(initial_inference_result_.end - initial_inference_result_.start).count();
   std::chrono::duration<double> inference_duration = performance_result_.end - performance_result_.start;
 
   std::cout << "Session creation time cost: " << session_create_duration.count() << " s\n"
+            << "First inference time cost: " << first_inference_duration << " ms\n"
             << "Total inference time cost: " << performance_result_.total_time_cost << " s\n"  // sum of time taken by each request
             << "Total inference requests: " << performance_result_.time_costs.size() << "\n"
             << "Average inference time cost: " << performance_result_.total_time_cost / performance_result_.time_costs.size() * 1000 << " ms\n"
diff --git a/onnxruntime/test/perftest/performance_runner.h b/onnxruntime/test/perftest/performance_runner.h
index aae68fd2d289f..da2df9c39f44c 100644
--- a/onnxruntime/test/perftest/performance_runner.h
+++ b/onnxruntime/test/perftest/performance_runner.h
@@ -106,6 +106,7 @@ class PerformanceRunner {
  private:
   std::chrono::time_point<std::chrono::high_resolution_clock> session_create_start_;
   std::chrono::time_point<std::chrono::high_resolution_clock> session_create_end_;
+  PerformanceResult initial_inference_result_;
   PerformanceResult performance_result_;
   PerformanceTestConfig performance_test_config_;
   std::unique_ptr<TestModelInfo> test_model_info_;

From 5db55ff16132c1e2c3b2d4dc353ff8e7f0ea761d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20M=C3=BCller?= <maximilianm@nvidia.com>
Date: Mon, 27 Feb 2023 15:17:39 +0100
Subject: [PATCH 27/30] detailed build log option

---
 .../tensorrt/tensorrt_provider_options.h      |  3 +-
 .../core/session/onnxruntime_c_api.h          |  1 +
 .../tensorrt/tensorrt_execution_provider.cc   | 79 ++++++++++++-------
 .../tensorrt/tensorrt_execution_provider.h    |  4 +
 .../tensorrt_execution_provider_info.cc       |  3 +-
 .../tensorrt_execution_provider_info.h        |  1 +
 .../tensorrt/tensorrt_provider_factory.cc     |  2 +-
 .../core/session/provider_bridge_ort.cc       |  2 +
 onnxruntime/test/perftest/ort_test_session.cc | 10 +++
 onnxruntime/test/providers/cpu/model_tests.cc |  2 +-
 .../providers/tensorrt/tensorrt_basic_test.cc |  3 +
 onnxruntime/test/util/default_providers.cc    |  1 +
 12 files changed, 77 insertions(+), 34 deletions(-)

diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
index b2c650179fbb0..ddf390db3d1c3 100644
--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
+++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
@@ -5,7 +5,7 @@
 
 /// <summary>
 /// Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT_V2.
-/// Please note that this struct is *similar* to OrtTensorRTProviderOptions but only to be used internally. 
+/// Please note that this struct is *similar* to OrtTensorRTProviderOptions but only to be used internally.
 /// Going forward, new trt provider options are to be supported via this struct and usage of the publicly defined
 /// OrtTensorRTProviderOptions will be deprecated over time.
 /// User can only get the instance of OrtTensorRTProviderOptionsV2 via CreateTensorRTProviderOptions.
@@ -33,4 +33,5 @@ struct OrtTensorRTProviderOptionsV2 {
   int trt_layer_norm_fp32_fallback;             // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true
   int trt_timing_cache_enable;                  // enable TensorRT timing cache. Default 0 = false, nonzero = true
   int trt_force_timing_cache;                   // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
+  int trt_detailed_build_log;                   // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true
 };
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 3881154f8d27e..66e2e64f8c434 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -532,6 +532,7 @@ typedef struct OrtTensorRTProviderOptions {
   int trt_engine_cache_enable;                  // enable engine caching. Default 0 = false, nonzero = true
   int trt_timing_cache_enable;                  // enable TensorRT timing cache. Default 0 = false, nonzero = true
   int trt_force_timing_cache;                   // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
+  int trt_detailed_build_log;                   // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true
   const char* trt_engine_cache_path;            // specify engine cache path
   int trt_engine_decryption_enable;             // enable engine decryption. Default 0 = false, nonzero = true
   const char* trt_engine_decryption_lib_path;   // specify engine decryption library path
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 64d88e3c1596b..e962e5ef52047 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -343,6 +343,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     dump_subgraphs_ = info.dump_subgraphs;
     engine_cache_enable_ = info.engine_cache_enable;
     timing_cache_enable_ = info.timing_cache_enable;
+    detailed_build_log_ = info.detailed_build_log;
     if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
       cache_path_ = info.engine_cache_path;
     }
@@ -422,6 +423,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
       timing_cache_enable_ = (std::stoi(timing_cache_enable_env) == 0 ? false : true);
     }
 
+    const std::string detailed_build_log_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDetailedBuildLog);
+    if (!detailed_build_log_env.empty()) {
+      detailed_build_log_ = (std::stoi(detailed_build_log_env) == 0 ? false : true);
+    }
+
     const std::string timing_force_match_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kForceTimingCache);
     if (!timing_force_match_env.empty()) {
       force_timing_cache_match_ = (std::stoi(timing_force_match_env) == 0 ? false : true);
@@ -1414,9 +1420,12 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
     if (!has_dynamic_shape) {
       const std::string cache_path = GetCachePath(cache_path_, trt_node_name_with_precision);
       const std::string engine_cache_path = cache_path + ".engine";
-      cudaDeviceProp prop;
-      CUDA_CALL_THROW(cudaGetDeviceProperties(&prop, device_id_));
-      const std::string timing_cache_path = GetTimingCachePath(cache_path_, prop);
+      std::string timing_cache_path = "";
+      if (timing_cache_enable_) {
+        cudaDeviceProp prop;
+        CUDA_CALL_THROW(cudaGetDeviceProperties(&prop, device_id_));
+        timing_cache_path = GetTimingCachePath(cache_path_, prop);
+      }
       {
         // ifstream file check, engine serialization/deserialization and engine build are in critical section. It needs lock protection to prevent race condition when inferencing with multithreading.
         auto lock = GetApiLock();
@@ -1473,20 +1482,26 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
                                      "TensorRT EP could not create timing cache: " + timing_cache_path);
             }
             trt_config->setTimingCache(*timing_cache, force_timing_cache_match_);
-            LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path;
+            if (detailed_build_log_) {
+              LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path;
+            }
         }
 
         // Build engine
-          auto engine_build_start = std::chrono::steady_clock::now();
+          std::chrono::steady_clock::time_point engine_build_start;
+          if (detailed_build_log_) {
+            engine_build_start = std::chrono::steady_clock::now();
+          }
           trt_engine = std::unique_ptr<nvinfer1::ICudaEngine>(trt_builder->buildEngineWithConfig(*trt_network, *trt_config));
           if (trt_engine == nullptr) {
             return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
                                    "TensorRT EP could not build engine for fused node: " + fused_node.Name());
           }
-          auto engine_build_stop = std::chrono::steady_clock::now();
-          LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_node_name_with_precision << " took: " <<
-                  std::chrono::duration_cast<std::chrono::milliseconds>(engine_build_stop - engine_build_start).count() << "ms" << std::endl;
-
+          if (detailed_build_log_) {
+            auto engine_build_stop = std::chrono::steady_clock::now();
+            LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_node_name_with_precision << " took: " <<
+                    std::chrono::duration_cast<std::chrono::milliseconds>(engine_build_stop - engine_build_start).count() << "ms" << std::endl;
+          }
           if (engine_cache_enable_) {
             std::unique_ptr<nvinfer1::IHostMemory> serializedModel(trt_engine->serialize());
             size_t engine_size = serializedModel->size();
@@ -1512,7 +1527,9 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
                                       "TensorRT EP could not serialize timing cache: " + timing_cache_path);
               }
               saveTimingCacheFile(timing_cache_path, timingCacheHostData.get());
-              LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path;
+              if (detailed_build_log_) {
+                LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path;
+              }
           }
         }
       }
@@ -1578,7 +1595,8 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
             input_shape_ranges_[context->node_name], &tensorrt_mu_, fp16_enable_, int8_enable_, int8_calibration_cache_available_,
             dla_enable_, dla_core_, &max_workspace_size_, trt_node_name_with_precision, engine_cache_enable_, cache_path_,
             runtime_.get(), nullptr, allocator_, context_memory_sharing_enable_, &max_ctx_mem_size_, &context_memory_,
-            dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_, timing_cache_enable_};
+            dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_, timing_cache_enable_,
+            force_timing_cache_match_, detailed_build_log_};
       *state = p.release();
       return 0;
     };
@@ -1619,9 +1637,12 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
       const std::string cache_path = GetCachePath(trt_state->engine_cache_path, trt_state->trt_node_name_with_precision);
       const std::string engine_cache_path = cache_path + ".engine";
       const std::string profile_cache_path = cache_path + ".profile";
-      cudaDeviceProp prop;
-      CUDA_CALL_THROW(cudaGetDeviceProperties(&prop, 0));
-      const std::string timing_cache_path = GetTimingCachePath(trt_state->engine_cache_path, prop);
+      std::string timing_cache_path = "";
+      if (timing_cache_enable_) {
+        cudaDeviceProp prop;
+        CUDA_CALL_THROW(cudaGetDeviceProperties(&prop, device_id_));
+        timing_cache_path = GetTimingCachePath(cache_path_, prop);
+      }
       if (trt_state->engine_cache_enable && trt_engine == nullptr) {
         std::ifstream engine_file(engine_cache_path, std::ios::binary | std::ios::in);
         std::ifstream profile_file(profile_cache_path, std::ios::binary | std::ios::in);
@@ -1866,19 +1887,25 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
                                      "TensorRT EP could not create timing cache: " + timing_cache_path);
             }
             trt_config->setTimingCache(*timing_cache, force_timing_cache_match_);
-            LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path;
+            if (detailed_build_log_) {
+              LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path;
+            }
         }
 
         // Build engine
         {
           auto lock = GetApiLock();
-          auto engine_build_start = std::chrono::steady_clock::now();
+          std::chrono::steady_clock::time_point engine_build_start;
+          if (detailed_build_log_) {
+            engine_build_start = std::chrono::steady_clock::now();
+          }
           *(trt_state->engine) = std::unique_ptr<nvinfer1::ICudaEngine>(
               trt_builder->buildEngineWithConfig(*trt_state->network->get(), *trt_config));
-          auto engine_build_stop = std::chrono::steady_clock::now();
+          if (detailed_build_log_) {
+            auto engine_build_stop = std::chrono::steady_clock::now();
           LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_state->trt_node_name_with_precision << " took: " <<
-                  std::chrono::duration_cast<std::chrono::milliseconds>(engine_build_stop - engine_build_start).count() << "ms" << std::endl;
-
+                    std::chrono::duration_cast<std::chrono::milliseconds>(engine_build_stop - engine_build_start).count() << "ms" << std::endl;
+          }
         }
         if (trt_state->engine == nullptr) {
           return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP Failed to Build Engine.");
@@ -1914,20 +1941,12 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
                                      "TensorRT EP could not serialize timing cache: " + timing_cache_path);
             }
             saveTimingCacheFile(timing_cache_path, timingCacheHostData.get());
-        }
-
-        // serialize and save timing cache
-        if (trt_state->timing_cache_enable)
-        {
-            auto timing_cache = trt_config->getTimingCache();
-            std::unique_ptr<nvinfer1::IHostMemory> timingCacheHostData{timing_cache->serialize()};
-            if (timingCacheHostData == nullptr) {
-              return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
-                                     "TensorRT EP could not serialize timing cache: " + timing_cache_path);
+            if (detailed_build_log_) {
+                LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path;
             }
-            saveTimingCacheFile(timing_cache_path, timingCacheHostData.get());
         }
 
+
         // Build context
         if (trt_state->context_memory_sharing_enable) {
           *(trt_state->context) = std::unique_ptr<nvinfer1::IExecutionContext>(
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index b294a7a51d26d..cb87b31e01b96 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -32,6 +32,7 @@ static const std::string kContextMemorySharingEnable = "ORT_TENSORRT_CONTEXT_MEM
 static const std::string kLayerNormFP32Fallback = "ORT_TENSORRT_LAYER_NORM_FP32_FALLBACK";
 static const std::string kTimingCacheEnable = "ORT_TENSORRT_TIMING_CACHE_ENABLE";
 static const std::string kForceTimingCache = "ORT_TENSORRT_FORCE_TIMING_CACHE_ENABLE";
+static const std::string kDetailedBuildLog = "ORT_TENSORRT_DETAILED_BUILD_LOG_ENABLE";
 // Old env variable for backward compatibility
 static const std::string kEngineCachePath = "ORT_TENSORRT_ENGINE_CACHE_PATH";
 }  // namespace tensorrt_env_vars
@@ -117,6 +118,8 @@ struct TensorrtFuncState {
   int (*engine_decryption)(const char*, char*, size_t*) = nullptr;
   int (*engine_encryption)(const char*, char*, size_t) = nullptr;
   bool timing_cache_enable = true;
+  bool force_timing_cache = false;
+  bool detailed_build_log = false;
 };
 
 // Logical device representation.
@@ -181,6 +184,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   int (*engine_encryption_)(const char*, char*, size_t) = nullptr;
   bool timing_cache_enable_ = false;
   bool force_timing_cache_match_ = false;
+  bool detailed_build_log_ = false;
 
   std::unordered_set<std::string> control_flow_op_set_ = {"If", "Loop", "Scan"};
   std::unordered_map<std::string, tensorrt_ptr::unique_pointer<nvonnxparser::IParser>> parsers_;
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
index 5389edd4af5c8..ae06e6ce1338d 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc
@@ -32,6 +32,7 @@ constexpr const char* kContextMemorySharingEnable = "trt_context_memory_sharing_
 constexpr const char* kLayerNormFP32Fallback = "trt_layer_norm_fp32_fallback";
 constexpr const char* kTimingCacheEnable = "trt_timing_cache_enable";
 constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache_match";
+constexpr const char* kDetailedBuildLog = "trt_detailed_build_log";
 }  // namespace provider_option_names
 }  // namespace tensorrt
 
@@ -64,7 +65,7 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions
           .AddAssignmentToReference(tensorrt::provider_option_names::kEngineCacheEnable, info.engine_cache_enable)
           .AddAssignmentToReference(tensorrt::provider_option_names::kCachePath, info.engine_cache_path)
           .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionEnable, info.engine_decryption_enable)
-          .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path) 
+          .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path)
           .AddAssignmentToReference(tensorrt::provider_option_names::kForceSequentialEngineBuild, info.force_sequential_engine_build)
           .AddAssignmentToReference(tensorrt::provider_option_names::kContextMemorySharingEnable, info.context_memory_sharing_enable)
           .AddAssignmentToReference(tensorrt::provider_option_names::kLayerNormFP32Fallback, info.layer_norm_fp32_fallback)
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
index bacc94795d69e..1a2e5e01af464 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h
@@ -35,6 +35,7 @@ struct TensorrtExecutionProviderInfo {
   bool layer_norm_fp32_fallback{false};
   bool timing_cache_enable{false};
   bool force_timing_cache{false};
+  bool detailed_build_log{false};
 
   static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
   static ProviderOptions ToProviderOptions(const TensorrtExecutionProviderInfo& info);
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
index e86730a7e58dd..9b4b8236e0b23 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc
@@ -71,7 +71,7 @@ struct Tensorrt_Provider : Provider {
     info.context_memory_sharing_enable = options.trt_context_memory_sharing_enable != 0;
     info.layer_norm_fp32_fallback = options.trt_layer_norm_fp32_fallback != 0;
     info.timing_cache_enable = options.trt_timing_cache_enable != 0;
-    info.force_timing_cache = options.trt_force_timing_cache != 0;
+    info.detailed_build_log = options.trt_detailed_build_log != 0;
     return std::make_shared<TensorrtProviderFactory>(info);
   }
 
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index ef7f20d1b7d96..8e70dd24ac10a 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1277,6 +1277,7 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti
   // Use default value as this field is not available in OrtTensorRTProviderOptions
   trt_options_converted.trt_timing_cache_enable = 0;
   trt_options_converted.trt_force_timing_cache = 0;
+  trt_options_converted.trt_detailed_build_log = 0;
   trt_options_converted.trt_context_memory_sharing_enable = 0;
   trt_options_converted.trt_layer_norm_fp32_fallback = 0;
   return trt_options_converted;
@@ -1605,6 +1606,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateTensorRTProviderOptions, _Outptr_ OrtTensorRT
   (*out)->trt_layer_norm_fp32_fallback = false;
   (*out)->trt_timing_cache_enable = false;
   (*out)->trt_force_timing_cache = false;
+  (*out)->trt_detailed_build_log = false;
   return nullptr;
 #else
   ORT_UNUSED_PARAMETER(out);
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index 4ff66c6a2067b..552274b77bbfb 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -123,6 +123,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
     bool trt_layer_norm_fp32_fallback = false;
     bool trt_timing_cache_enable = false;
     bool trt_force_timing_cache = false;
+    bool trt_detailed_build_log = false;
 
 #ifdef _MSC_VER
     std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
@@ -286,6 +287,14 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
         } else {
           ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_timing_cache' should be a boolean i.e. true or false. Default value is false.\n");
         }
+      } else if (key == "trt_detailed_build_log") {
+        if (value == "true" || value == "True") {
+          trt_detailed_build_log = true;
+        } else if (value == "false" || value == "False") {
+          trt_detailed_build_log = false;
+        } else {
+          ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_detailed_build_log' should be a boolean i.e. true or false. Default value is false.\n");
+        }
       } else {
         ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['device_id', 'trt_max_partition_iterations', 'trt_min_subgraph_size', 'trt_max_workspace_size', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table', 'trt_dla_enable', 'trt_dla_core', 'trt_dump_subgraphs', 'trt_engine_cache_enable', 'trt_engine_cache_path', 'trt_engine_decryption_enable', 'trt_engine_decryption_lib_path', 'trt_force_sequential_engine_build', 'trt_context_memory_sharing_enable', 'trt_layer_norm_fp32_fallback'] \n");
       }
@@ -313,6 +322,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
     tensorrt_options.trt_layer_norm_fp32_fallback = trt_layer_norm_fp32_fallback;
     tensorrt_options.trt_timing_cache_enable = trt_timing_cache_enable;
     tensorrt_options.trt_force_timing_cache = trt_force_timing_cache;
+    tensorrt_options.trt_detailed_build_log = trt_detailed_build_log;
     session_options.AppendExecutionProvider_TensorRT_V2(tensorrt_options);
 
     OrtCUDAProviderOptions cuda_options;
diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 768abbc316bac..1da491fe4f9b0 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -683,7 +683,7 @@ TEST_P(ModelTest, Run) {
         if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) {
           OrtTensorRTProviderOptionsV2 params{0, 0, nullptr, 1000, 1, 1 << 30,
                                               1,  // enable fp16
-                                              0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0, 0};
+                                              0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0, 0, 0};
           ortso.AppendExecutionProvider_TensorRT_V2(params);
         } else {
           OrtTensorRTProviderOptionsV2* ep_option = nullptr;
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index df9a0bd190881..2d38bf7b4b3ba 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -155,6 +155,7 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string
       0,
       0,
       0,
+      0,
       0};
 
     params.trt_engine_cache_enable = 1;
@@ -228,6 +229,7 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string
       0,
       0,
       0,
+      0,
       0};
 
     params.trt_engine_cache_enable = 1;
@@ -394,6 +396,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
       0,
       0,
       0,
+      0,
       0};
 
   if (cache_type.compare("engine") == 0) {
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index d68d2512f32cf..55db973b6fce8 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -41,6 +41,7 @@ std::unique_ptr<IExecutionProvider> DefaultTensorrtExecutionProvider() {
       0,
       0,
       0,
+      0,
       nullptr,
       0,
       nullptr,

From 6b0be1db5fb68f9efe24697959f320219c8cfa09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20M=C3=BCller?= <maximilianm@nvidia.com>
Date: Wed, 1 Mar 2023 18:59:50 +0100
Subject: [PATCH 28/30] format changes and adding force timing cache to
 provider options

---
 .../tensorrt/tensorrt_execution_provider.cc   | 130 +++++++++---------
 1 file changed, 62 insertions(+), 68 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index e962e5ef52047..10c67963b85cc 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -118,34 +118,30 @@ bool SetDynamicRange(nvinfer1::INetworkDefinition& network, std::unordered_map<s
   return true;
 }
 
-inline std::vector<char> loadTimingCacheFile(const std::string inFileName)
-{
-    std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
-    if (!iFile)
-    {
-        LOGS_DEFAULT(WARNING) << "[TensorRT EP] Could not read timing cache from: " << inFileName
-                              << ". A new timing cache will be generated and written.";
-        return std::vector<char>();
-    }
-    iFile.seekg(0, std::ifstream::end);
-    size_t fsize = iFile.tellg();
-    iFile.seekg(0, std::ifstream::beg);
-    std::vector<char> content(fsize);
-    iFile.read(content.data(), fsize);
-    iFile.close();
-    return content;
+inline std::vector<char> loadTimingCacheFile(const std::string inFileName) {
+  std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
+  if (!iFile) {
+    LOGS_DEFAULT(WARNING) << "[TensorRT EP] Could not read timing cache from: " << inFileName
+                          << ". A new timing cache will be generated and written.";
+    return std::vector<char>();
+  }
+  iFile.seekg(0, std::ifstream::end);
+  size_t fsize = iFile.tellg();
+  iFile.seekg(0, std::ifstream::beg);
+  std::vector<char> content(fsize);
+  iFile.read(content.data(), fsize);
+  iFile.close();
+  return content;
 }
 
-inline void saveTimingCacheFile(const std::string outFileName, const nvinfer1::IHostMemory* blob)
-{
-    std::ofstream oFile(outFileName, std::ios::out | std::ios::binary);
-    if (!oFile)
-    {
-        LOGS_DEFAULT(WARNING) << "[TensorRT EP] Could not write timing cache to: " << outFileName;
-        return;
-    }
-    oFile.write((char*) blob->data(), blob->size());
-    oFile.close();
+inline void saveTimingCacheFile(const std::string outFileName, const nvinfer1::IHostMemory* blob) {
+  std::ofstream oFile(outFileName, std::ios::out | std::ios::binary);
+  if (!oFile) {
+    LOGS_DEFAULT(WARNING) << "[TensorRT EP] Could not write timing cache to: " << outFileName;
+    return;
+  }
+  oFile.write((char*)blob->data(), blob->size());
+  oFile.close();
 }
 }  // namespace
 
@@ -343,6 +339,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
     dump_subgraphs_ = info.dump_subgraphs;
     engine_cache_enable_ = info.engine_cache_enable;
     timing_cache_enable_ = info.timing_cache_enable;
+    force_timing_cache_match_ = info.force_timing_cache;
     detailed_build_log_ = info.detailed_build_log;
     if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) {
       cache_path_ = info.engine_cache_path;
@@ -1473,8 +1470,8 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
           }
 
           // Load timing cache from file. Create a fresh cache if the file doesn't exist
-        std::unique_ptr<nvinfer1::ITimingCache> timing_cache = nullptr;
-        if (timing_cache_enable_) {
+          std::unique_ptr<nvinfer1::ITimingCache> timing_cache = nullptr;
+          if (timing_cache_enable_) {
             std::vector<char> loaded_timing_cache = loadTimingCacheFile(timing_cache_path);
             timing_cache.reset(trt_config->createTimingCache(static_cast<const void*>(loaded_timing_cache.data()), loaded_timing_cache.size()));
             if (timing_cache == nullptr) {
@@ -1485,9 +1482,9 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
             if (detailed_build_log_) {
               LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path;
             }
-        }
+          }
 
-        // Build engine
+          // Build engine
           std::chrono::steady_clock::time_point engine_build_start;
           if (detailed_build_log_) {
             engine_build_start = std::chrono::steady_clock::now();
@@ -1500,7 +1497,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
           if (detailed_build_log_) {
             auto engine_build_stop = std::chrono::steady_clock::now();
             LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_node_name_with_precision << " took: " <<
-                    std::chrono::duration_cast<std::chrono::milliseconds>(engine_build_stop - engine_build_start).count() << "ms" << std::endl;
+              std::chrono::duration_cast<std::chrono::milliseconds>(engine_build_stop - engine_build_start).count() << "ms" << std::endl;
           }
           if (engine_cache_enable_) {
             std::unique_ptr<nvinfer1::IHostMemory> serializedModel(trt_engine->serialize());
@@ -1518,18 +1515,17 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
             LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized engine " + engine_cache_path;
           }
           // serialize and save timing cache
-          if (timing_cache_enable_)
-          {
-              auto timing_cache = trt_config->getTimingCache();
-              std::unique_ptr<nvinfer1::IHostMemory> timingCacheHostData{timing_cache->serialize()};
-              if (timingCacheHostData == nullptr) {
-                return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
-                                      "TensorRT EP could not serialize timing cache: " + timing_cache_path);
-              }
-              saveTimingCacheFile(timing_cache_path, timingCacheHostData.get());
-              if (detailed_build_log_) {
-                LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path;
-              }
+          if (timing_cache_enable_) {
+            auto timing_cache = trt_config->getTimingCache();
+            std::unique_ptr<nvinfer1::IHostMemory> timingCacheHostData{timing_cache->serialize()};
+            if (timingCacheHostData == nullptr) {
+              return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
+                                     "TensorRT EP could not serialize timing cache: " + timing_cache_path);
+            }
+            saveTimingCacheFile(timing_cache_path, timingCacheHostData.get());
+            if (detailed_build_log_) {
+              LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path;
+            }
           }
         }
       }
@@ -1880,16 +1876,16 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
         // Load timing cache from file. Create a fresh cache if the file doesn't exist
         std::unique_ptr<nvinfer1::ITimingCache> timing_cache = nullptr;
         if (trt_state->timing_cache_enable) {
-            std::vector<char> loaded_timing_cache = loadTimingCacheFile(timing_cache_path);
-            timing_cache.reset(trt_config->createTimingCache(static_cast<const void*>(loaded_timing_cache.data()), loaded_timing_cache.size()));
-            if (timing_cache == nullptr) {
-              return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
-                                     "TensorRT EP could not create timing cache: " + timing_cache_path);
-            }
-            trt_config->setTimingCache(*timing_cache, force_timing_cache_match_);
-            if (detailed_build_log_) {
-              LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path;
-            }
+          std::vector<char> loaded_timing_cache = loadTimingCacheFile(timing_cache_path);
+          timing_cache.reset(trt_config->createTimingCache(static_cast<const void*>(loaded_timing_cache.data()), loaded_timing_cache.size()));
+          if (timing_cache == nullptr) {
+            return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
+                                   "TensorRT EP could not create timing cache: " + timing_cache_path);
+          }
+          trt_config->setTimingCache(*timing_cache, force_timing_cache_match_);
+          if (detailed_build_log_) {
+            LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path;
+          }
         }
 
         // Build engine
@@ -1903,8 +1899,8 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
               trt_builder->buildEngineWithConfig(*trt_state->network->get(), *trt_config));
           if (detailed_build_log_) {
             auto engine_build_stop = std::chrono::steady_clock::now();
-          LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_state->trt_node_name_with_precision << " took: " <<
-                    std::chrono::duration_cast<std::chrono::milliseconds>(engine_build_stop - engine_build_start).count() << "ms" << std::endl;
+            LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_state->trt_node_name_with_precision << " took: " <<
+              std::chrono::duration_cast<std::chrono::milliseconds>(engine_build_stop - engine_build_start).count() << "ms" << std::endl;
           }
         }
         if (trt_state->engine == nullptr) {
@@ -1932,21 +1928,19 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
         }
 
         // serialize and save timing cache
-        if (trt_state->timing_cache_enable)
-        {
-            auto timing_cache = trt_config->getTimingCache();
-            std::unique_ptr<nvinfer1::IHostMemory> timingCacheHostData{timing_cache->serialize()};
-            if (timingCacheHostData == nullptr) {
-              return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
-                                     "TensorRT EP could not serialize timing cache: " + timing_cache_path);
-            }
-            saveTimingCacheFile(timing_cache_path, timingCacheHostData.get());
-            if (detailed_build_log_) {
-                LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path;
-            }
+        if (trt_state->timing_cache_enable) {
+          auto timing_cache = trt_config->getTimingCache();
+          std::unique_ptr<nvinfer1::IHostMemory> timingCacheHostData{timing_cache->serialize()};
+          if (timingCacheHostData == nullptr) {
+            return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
+                                   "TensorRT EP could not serialize timing cache: " + timing_cache_path);
+          }
+          saveTimingCacheFile(timing_cache_path, timingCacheHostData.get());
+          if (detailed_build_log_) {
+            LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path;
+          }
         }
 
-
         // Build context
         if (trt_state->context_memory_sharing_enable) {
           *(trt_state->context) = std::unique_ptr<nvinfer1::IExecutionContext>(

From c1c3f712467b74673146e9f3f72c5cdbecd39267 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20M=C3=BCller?= <maximilianm@nvidia.com>
Date: Mon, 6 Mar 2023 09:57:15 +0100
Subject: [PATCH 29/30] fix pybind OrtTensorRTProviderOptionsV2

---
 onnxruntime/python/onnxruntime_pybind_state.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index dd24ce51e1111..667073063d4cc 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -366,6 +366,9 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
             nullptr,
             0,
             0,
+            0,
+            0,
+            0,
             0};
         for (auto option : it->second) {
           if (option.first == "device_id") {

From b888fc3ab888b418e270aa35cc2ee823aabf785d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20M=C3=BCller?= <maximilianm@nvidia.com>
Date: Thu, 9 Mar 2023 00:12:27 +0100
Subject: [PATCH 30/30] reset OrtTensorRTProviderOptions

---
 include/onnxruntime/core/session/onnxruntime_c_api.h | 3 ---
 onnxruntime/test/util/default_providers.cc           | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 66e2e64f8c434..09cd8f0f748fc 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -530,9 +530,6 @@ typedef struct OrtTensorRTProviderOptions {
   int trt_dla_core;                             // DLA core number. Default 0
   int trt_dump_subgraphs;                       // dump TRT subgraph. Default 0 = false, nonzero = true
   int trt_engine_cache_enable;                  // enable engine caching. Default 0 = false, nonzero = true
-  int trt_timing_cache_enable;                  // enable TensorRT timing cache. Default 0 = false, nonzero = true
-  int trt_force_timing_cache;                   // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
-  int trt_detailed_build_log;                   // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true
   const char* trt_engine_cache_path;            // specify engine cache path
   int trt_engine_decryption_enable;             // enable engine decryption. Default 0 = false, nonzero = true
   const char* trt_engine_decryption_lib_path;   // specify engine decryption library path
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index 55db973b6fce8..333203085ae77 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -39,9 +39,6 @@ std::unique_ptr<IExecutionProvider> DefaultTensorrtExecutionProvider() {
       0,
       0,
       0,
-      0,
-      0,
-      0,
       nullptr,
       0,
       nullptr,