From 3ba51a231386276f819393f88db4dab62afe6db3 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 23 Dec 2021 19:19:44 +0000 Subject: [PATCH 01/30] add timing cache --- .../tensorrt/tensorrt_execution_provider.cc | 103 +++++++++++++++++- .../tensorrt/tensorrt_execution_provider.h | 3 + 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 3f0751151fd2a..a2df547c5869b 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -19,6 +19,9 @@ #include #include #include +#include +#include +#include #include "flatbuffers/idl.h" #include "ort_trt_int8_cal_table.fbs.h" @@ -264,6 +267,36 @@ bool SetDynamicRange(nvinfer1::INetworkDefinition& network, std::unordered_map loadTimingCacheFile(const std::string inFileName) +{ + std::ifstream iFile(inFileName, std::ios::in | std::ios::binary); + if (!iFile) + { + LOGS_DEFAULT(WARNING) << "[TensorRT EP] Could not read timing cache from: " << inFileName + << ". A new timing cache will be generated and written."; + return std::vector(); + } + iFile.seekg(0, std::ifstream::end); + size_t fsize = iFile.tellg(); + iFile.seekg(0, std::ifstream::beg); + std::vector content(fsize); + iFile.read(content.data(), fsize); + iFile.close(); + return content; +} + +inline void saveTimingCacheFile(const std::string outFileName, const nvinfer1::IHostMemory* blob) +{ + std::ofstream oFile(outFileName, std::ios::out | std::ios::binary); + if (!oFile) + { + LOGS_DEFAULT(WARNING) << "[TensorRT EP] Could not write timing cache to: " << outFileName; + return; + } + oFile.write((char*) blob->data(), blob->size()); + oFile.close(); +} } // namespace namespace google { @@ -519,6 +552,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv if (!force_sequential_engine_build_env.empty()) { force_sequential_engine_build_ = (std::stoi(force_sequential_engine_build_env) == 0 ? false : true); } + + const std::string timing_cache_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kTimingCacheEnable); + if (!timing_cache_enable_env.empty()) { + timing_cache_enable_ = (std::stoi(timing_cache_enable_env) == 0 ? false : true); + } } // Validate setting @@ -1289,6 +1327,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse if (!has_dynamic_shape) { const std::string cache_path = GetCachePath(cache_path_, trt_node_name_with_precision); const std::string engine_cache_path = cache_path + ".engine"; + const std::string timing_cache_path = cache_path + ".timing"; std::ifstream engine_file(engine_cache_path, std::ios::binary | std::ios::in); if (engine_cache_enable_ && engine_file) { engine_file.seekg(0, std::ios::end); @@ -1331,10 +1370,29 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse } } + LOGS_DEFAULT(WARNING) << timing_cache_enable_; + LOGS_DEFAULT(WARNING) << timing_cache_path; + + // Load timing cache from file. Create a fresh cache if the file doesn't exist + std::unique_ptr timing_cache = nullptr; + if (timing_cache_enable_) { + std::vector loaded_timing_cache = loadTimingCacheFile(timing_cache_path); + timing_cache.reset(trt_config->createTimingCache(static_cast(loaded_timing_cache.data()), loaded_timing_cache.size())); + if (timing_cache == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not create timing cache: " + timing_cache_path); + } + trt_config->setTimingCache(*timing_cache, false); + } + // Build engine { auto lock = GetEngineBuildLock(); + auto start = std::chrono::high_resolution_clock::now(); trt_engine = tensorrt_ptr::unique_pointer(trt_builder->buildEngineWithConfig(*trt_network, *trt_config)); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration duration = end - start; + LOGS_DEFAULT(WARNING) << "Elapsed time (in Compile) in milliseconds: " << duration.count(); } if (trt_engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, @@ -1356,6 +1414,18 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse serializedModel->destroy(); LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized " + engine_cache_path; } + + // serialize and save timing cache + if (timing_cache_enable_) + { + auto timing_cache = trt_config->getTimingCache(); + std::unique_ptr timingCacheHostData{timing_cache->serialize()}; + if (timingCacheHostData == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not serialize timing cache: " + timing_cache_path); + } + saveTimingCacheFile(timing_cache_path, timingCacheHostData.get()); + } } // Build context @@ -1409,7 +1479,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse &networks_[context->node_name], input_info_[context->node_name], output_info_[context->node_name], input_shape_ranges_[context->node_name], &tensorrt_mu_, fp16_enable_, int8_enable_, int8_calibration_cache_available_, dla_enable_, dla_core_, &max_workspace_size_, trt_node_name_with_precision, engine_cache_enable_, cache_path_, - runtime_.get(), nullptr, allocator_, dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_}; + runtime_.get(), nullptr, allocator_, dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_, timing_cache_enable_}; *state = p.release(); return 0; }; @@ -1445,6 +1515,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse const std::string cache_path = GetCachePath(trt_state->engine_cache_path, trt_state->trt_node_name_with_precision); const std::string engine_cache_path = cache_path + ".engine"; const std::string profile_cache_path = cache_path + ".profile"; + const std::string timing_cache_path = cache_path + ".timing"; if (trt_state->engine_cache_enable && trt_engine == nullptr) { std::ifstream engine_file(engine_cache_path, std::ios::binary | std::ios::in); std::ifstream profile_file(profile_cache_path, std::ios::binary | std::ios::in); @@ -1672,11 +1743,29 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse trt_config->setDLACore(trt_state->dla_core); } + LOGS_DEFAULT(WARNING) << timing_cache_enable_; + LOGS_DEFAULT(WARNING) << timing_cache_path; + // Load timing cache from file. Create a fresh cache if the file doesn't exist + std::unique_ptr timing_cache = nullptr; + if (trt_state->timing_cache_enable) { + std::vector loaded_timing_cache = loadTimingCacheFile(timing_cache_path); + timing_cache.reset(trt_config->createTimingCache(static_cast(loaded_timing_cache.data()), loaded_timing_cache.size())); + if (timing_cache == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not create timing cache: " + timing_cache_path); + } + trt_config->setTimingCache(*timing_cache, false); + } + // Build engine { auto lock = GetEngineBuildLock(); + auto start = std::chrono::high_resolution_clock::now(); *(trt_state->engine) = tensorrt_ptr::unique_pointer( trt_builder->buildEngineWithConfig(*trt_state->network->get(), *trt_config)); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration duration = end - start; + LOGS_DEFAULT(WARNING) << "Elapsed time (in compute_func) in milliseconds: " << duration.count(); } if (trt_state->engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP Failed to Build Engine."); @@ -1703,6 +1792,18 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse serializedModel->destroy(); } + // serialize and save timing cache + if (trt_state->timing_cache_enable) + { + auto timing_cache = trt_config->getTimingCache(); + std::unique_ptr timingCacheHostData{timing_cache->serialize()}; + if (timingCacheHostData == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not serialize timing cache: " + timing_cache_path); + } + saveTimingCacheFile(timing_cache_path, timingCacheHostData.get()); + } + // Build context *(trt_state->context) = tensorrt_ptr::unique_pointer( trt_state->engine->get()->createExecutionContext()); diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index ee759228ae1fa..e8bbd44ea961a 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -26,6 +26,7 @@ static const std::string kCachePath = "ORT_TENSORRT_CACHE_PATH"; static const std::string kDecryptionEnable = "ORT_TENSORRT_ENGINE_DECRYPTION_ENABLE"; static const std::string kDecryptionLibPath = "ORT_TENSORRT_ENGINE_DECRYPTION_LIB_PATH"; static const std::string kForceSequentialEngineBuild= "ORT_TENSORRT_FORCE_SEQUENTIAL_ENGINE_BUILD"; +static const std::string kTimingCacheEnable = "ORT_TENSORRT_TIMING_CACHE_ENABLE"; // Old env variable for backward compatibility static const std::string kEngineCachePath = "ORT_TENSORRT_ENGINE_CACHE_PATH"; } // namespace tensorrt_env_vars @@ -107,6 +108,7 @@ struct TensorrtFuncState { bool engine_decryption_enable; int (*engine_decryption)(const char*, char*, size_t*); int (*engine_encryption)(const char*, char*, size_t); + bool timing_cache_enable; }; // Logical device representation. @@ -167,6 +169,7 @@ class TensorrtExecutionProvider : public IExecutionProvider { bool engine_decryption_enable_ = false; int (*engine_decryption_)(const char*, char*, size_t*); int (*engine_encryption_)(const char*, char*, size_t); + bool timing_cache_enable_ = false; std::unordered_map> parsers_; std::unordered_map> engines_; From f905d355c765f04b24dbd2dc3c1ca0d3c2a23686 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 6 Jan 2022 18:55:09 +0000 Subject: [PATCH 02/30] enable timing cache for test --- .../core/providers/tensorrt/tensorrt_execution_provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index e8bbd44ea961a..46f90565f88a0 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -169,7 +169,7 @@ class TensorrtExecutionProvider : public IExecutionProvider { bool engine_decryption_enable_ = false; int (*engine_decryption_)(const char*, char*, size_t*); int (*engine_encryption_)(const char*, char*, size_t); - bool timing_cache_enable_ = false; + bool timing_cache_enable_ = true; std::unordered_map> parsers_; std::unordered_map> engines_; From 086ba0ef87fd269231842cc0c0733936599c6f83 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 10 Jan 2022 07:34:06 +0000 Subject: [PATCH 03/30] Make it only on Linux --- onnxruntime/test/providers/cpu/model_tests.cc | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index fbdb421e5fa46..847c48b519fd9 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -990,7 +990,43 @@ ::std::vector<::std::basic_string> GetParameterStrings() { return v; } +auto GenerateCustomTestName = [](const ::testing::TestParamInfo& info) { + // use info.param here to generate the test suffix + std::basic_string name = info.param; + + // the original name here is the combination of provider name and model path name + // remove the trailing 'xxxxxxx/model.onnx' of name + if (name.size() > 11 && name.substr(name.size() - 11) == ORT_TSTR("/model.onnx")) { + name = name.substr(0, info.param.size() - 11); + } + // remove the trailing 'xxxxxx.onnx' of name + else if (name.size() > 5 && name.substr(name.size() - 5) == ORT_TSTR(".onnx")) { + name = name.substr(0, info.param.size() - 5); + } + + // Note: test name only accepts '_' and alphanumeric + // replace '/' with '_' since '_' + std::replace(name.begin(), name.end(), '/', '_'); + + // Note: test name only accepts '_' and alphanumeric + // remove '.' and '-' + char chars[] = ".-"; + for (unsigned int i = 0; i < strlen(chars); ++i) { + name.erase(std::remove(name.begin(), name.end(), chars[i]), name.end()); + } + + return name; +}; + +// The optional last argument is a function or functor that generates custom test name suffixes based on the test parameters. +// Specify the last argument to make test name more meaningful and clear instead of just the sequential number. +#ifdef _WIN32 +// Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string. We use wchar_t on Windows and will encounter error. +// So, we don't provide custom test name on Windows now. INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings())); +#else +INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()), GenerateCustomTestName); +#endif } // namespace test } // namespace onnxruntime From 191424b563a7f020c971498a7a2555ddce84ef5e Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 14 Jan 2022 22:18:05 +0000 Subject: [PATCH 04/30] undo last commit --- onnxruntime/test/providers/cpu/model_tests.cc | 36 ------------------- 1 file changed, 36 deletions(-) diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index 847c48b519fd9..fbdb421e5fa46 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -990,43 +990,7 @@ ::std::vector<::std::basic_string> GetParameterStrings() { return v; } -auto GenerateCustomTestName = [](const ::testing::TestParamInfo& info) { - // use info.param here to generate the test suffix - std::basic_string name = info.param; - - // the original name here is the combination of provider name and model path name - // remove the trailing 'xxxxxxx/model.onnx' of name - if (name.size() > 11 && name.substr(name.size() - 11) == ORT_TSTR("/model.onnx")) { - name = name.substr(0, info.param.size() - 11); - } - // remove the trailing 'xxxxxx.onnx' of name - else if (name.size() > 5 && name.substr(name.size() - 5) == ORT_TSTR(".onnx")) { - name = name.substr(0, info.param.size() - 5); - } - - // Note: test name only accepts '_' and alphanumeric - // replace '/' with '_' since '_' - std::replace(name.begin(), name.end(), '/', '_'); - - // Note: test name only accepts '_' and alphanumeric - // remove '.' and '-' - char chars[] = ".-"; - for (unsigned int i = 0; i < strlen(chars); ++i) { - name.erase(std::remove(name.begin(), name.end(), chars[i]), name.end()); - } - - return name; -}; - -// The optional last argument is a function or functor that generates custom test name suffixes based on the test parameters. -// Specify the last argument to make test name more meaningful and clear instead of just the sequential number. -#ifdef _WIN32 -// Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string. We use wchar_t on Windows and will encounter error. -// So, we don't provide custom test name on Windows now. INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings())); -#else -INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()), GenerateCustomTestName); -#endif } // namespace test } // namespace onnxruntime From 4fe5a0a25f5cf47f728da5050607a2966351dda7 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 14 Jan 2022 23:57:51 +0000 Subject: [PATCH 05/30] add 'timing_cache_enable' tensorrt provider options --- .../tensorrt/tensorrt_provider_options.h | 5 +++- .../tensorrt_execution_provider_info.cc | 3 ++ .../tensorrt_execution_provider_info.h | 1 + .../tensorrt/tensorrt_provider_factory.cc | 6 ++-- .../core/session/provider_bridge_ort.cc | 30 +++++++++++++++++++ 5 files changed, 42 insertions(+), 3 deletions(-) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h index 43b0b938f130b..9e063fc2d015e 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h @@ -5,7 +5,9 @@ /// /// Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT_V2. -/// Please note that this struct is identical to OrtTensorRTProviderOptions but only to be used internally. +/// Please note that this struct is *similar* to OrtTensorRTProviderOptions but only to be used internally. +/// Going forward, new trt provider options are to be supported via this struct and usage of the publicly defined +/// OrtTensorRTProviderOptions will be deprecated over time. /// User can only get the instance of OrtTensorRTProviderOptionsV2 via CreateTensorRTProviderOptions. /// struct OrtTensorRTProviderOptionsV2 { @@ -27,4 +29,5 @@ struct OrtTensorRTProviderOptionsV2 { int trt_engine_decryption_enable; // enable engine decryption. Default 0 = false, nonzero = true const char* trt_engine_decryption_lib_path; // specify engine decryption library path int trt_force_sequential_engine_build; // force building TensorRT engine sequentially. Default 0 = false, nonzero = true + int trt_timing_cache_enable; // enable TensorRT timing cache. Default 0 = false, nonzero = true }; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc index cfc43350a210e..97b4466372870 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc @@ -27,6 +27,7 @@ constexpr const char* kCachePath = "trt_engine_cache_path"; constexpr const char* kDecryptionEnable = "trt_engine_decryption_enable"; constexpr const char* kDecryptionLibPath = "trt_engine_decryption_lib_path"; constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine_build"; +constexpr const char* kTimingCacheEnable = "trt_timing_cache_enable"; } // namespace provider_option_names } // namespace tensorrt @@ -63,6 +64,7 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionEnable, info.engine_decryption_enable) .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path) .AddAssignmentToReference(tensorrt::provider_option_names::kForceSequentialEngineBuild, info.force_sequential_engine_build) + .AddAssignmentToReference(tensorrt::provider_option_names::kTimingCacheEnable, info.timing_cache_enable) .Parse(options)); return info; @@ -87,6 +89,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE {tensorrt::provider_option_names::kDecryptionEnable, MakeStringWithClassicLocale(info.engine_decryption_enable)}, {tensorrt::provider_option_names::kDecryptionLibPath, MakeStringWithClassicLocale(info.engine_decryption_lib_path)}, {tensorrt::provider_option_names::kForceSequentialEngineBuild, MakeStringWithClassicLocale(info.force_sequential_engine_build)}, + {tensorrt::provider_option_names::kTimingCacheEnable, MakeStringWithClassicLocale(info.timing_cache_enable)}, }; return options; } diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h index b6d879bf72558..e1bee9ec6f5ce 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h @@ -31,6 +31,7 @@ struct TensorrtExecutionProviderInfo { bool engine_decryption_enable{false}; std::string engine_decryption_lib_path{""}; bool force_sequential_engine_build{false}; + bool timing_cache_enable{false}; static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options); static ProviderOptions ToProviderOptions(const TensorrtExecutionProviderInfo& info); diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc index d65c91d88f60d..10ab50ad4f0f2 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc @@ -48,7 +48,7 @@ struct Tensorrt_Provider : Provider { } std::shared_ptr CreateExecutionProviderFactory(const void* provider_options) override { - auto& options = *reinterpret_cast(provider_options); + auto& options = *reinterpret_cast(provider_options); TensorrtExecutionProviderInfo info; info.device_id = options.device_id; info.has_user_compute_stream = options.has_user_compute_stream != 0; @@ -69,12 +69,13 @@ struct Tensorrt_Provider : Provider { info.engine_decryption_enable = options.trt_engine_decryption_enable != 0; info.engine_decryption_lib_path = options.trt_engine_decryption_lib_path == nullptr ? "" : options.trt_engine_decryption_lib_path; info.force_sequential_engine_build = options.trt_force_sequential_engine_build != 0; + info.timing_cache_enable = options.trt_timing_cache_enable; return std::make_shared(info); } void UpdateProviderOptions(void* provider_options, const ProviderOptions& options) override { auto internal_options = onnxruntime::TensorrtExecutionProviderInfo::FromProviderOptions(options); - auto& trt_options = *reinterpret_cast(provider_options); + auto& trt_options = *reinterpret_cast(provider_options); trt_options.device_id = internal_options.device_id; trt_options.trt_max_partition_iterations = internal_options.max_partition_iterations; trt_options.trt_min_subgraph_size = internal_options.min_subgraph_size; @@ -134,6 +135,7 @@ struct Tensorrt_Provider : Provider { } trt_options.trt_force_sequential_engine_build = internal_options.force_sequential_engine_build; + trt_options.trt_timing_cache_enable = internal_options.timing_cache_enable; } ProviderOptions GetProviderOptions(const void* provider_options) override { diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 36ebf32f0499f..b2e69bb4c94f3 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1155,7 +1155,36 @@ std::shared_ptr CreateExecutionProviderFactory_MIGrap return nullptr; } +// Adapter to convert the legacy OrtTensorRTProviderOptions to the latest OrtTensorRTProviderOptionsV2 +OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(const OrtTensorRTProviderOptions* legacy_trt_options) { + OrtTensorRTProviderOptionsV2 trt_options_converted; + + trt_options_converted.device_id = legacy_trt_options->device_id; + trt_options_converted.has_user_compute_stream = legacy_trt_options->has_user_compute_stream; + trt_options_converted.user_compute_stream = legacy_trt_options->user_compute_stream; + trt_options_converted.trt_max_partition_iterations = legacy_trt_options->trt_max_partition_iterations; + trt_options_converted.trt_min_subgraph_size = legacy_trt_options->trt_min_subgraph_size; + trt_options_converted.trt_max_workspace_size = legacy_trt_options->trt_max_workspace_size; + trt_options_converted.trt_fp16_enable = legacy_trt_options->trt_fp16_enable; + trt_options_converted.trt_int8_enable = legacy_trt_options->trt_int8_enable; + trt_options_converted.trt_int8_calibration_table_name = legacy_trt_options->trt_int8_calibration_table_name; + trt_options_converted.trt_int8_use_native_calibration_table = legacy_trt_options->trt_int8_use_native_calibration_table; + trt_options_converted.trt_dla_enable = legacy_trt_options->trt_dla_enable; + trt_options_converted.trt_dla_core = legacy_trt_options->trt_dla_core; + trt_options_converted.trt_dump_subgraphs = legacy_trt_options->trt_dump_subgraphs; + trt_options_converted.trt_engine_cache_enable = legacy_trt_options->trt_engine_cache_enable; + trt_options_converted.trt_engine_cache_path = legacy_trt_options->trt_engine_cache_path; + trt_options_converted.trt_engine_decryption_enable = legacy_trt_options->trt_engine_decryption_enable; + trt_options_converted.trt_engine_decryption_lib_path = legacy_trt_options->trt_engine_decryption_lib_path; + trt_options_converted.trt_force_sequential_engine_build = legacy_trt_options->trt_force_sequential_engine_build; + // Use default value as this field is not available in OrtTensorRTProviderOptionsV2 + trt_options_converted.trt_timing_cache_enalbed = 0; + + return trt_options_converted; +} + std::shared_ptr CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* provider_options) { + OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options); if (auto* provider = s_library_tensorrt.Get()) return provider->CreateExecutionProviderFactory(provider_options); @@ -1466,6 +1495,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateTensorRTProviderOptions, _Outptr_ OrtTensorRT (*out)->trt_engine_decryption_enable = false; (*out)->trt_engine_decryption_lib_path = nullptr; (*out)->trt_force_sequential_engine_build = false; + (*out)->trt_timing_cache_enable = false; return nullptr; #else ORT_UNUSED_PARAMETER(out); From 83e251d68d7f9260ebda428efc67299a89d027b7 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 15 Jan 2022 00:05:00 +0000 Subject: [PATCH 06/30] fix bug --- onnxruntime/core/session/provider_bridge_ort.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index b2e69bb4c94f3..9ec828d28cd01 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1178,7 +1178,7 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti trt_options_converted.trt_engine_decryption_lib_path = legacy_trt_options->trt_engine_decryption_lib_path; trt_options_converted.trt_force_sequential_engine_build = legacy_trt_options->trt_force_sequential_engine_build; // Use default value as this field is not available in OrtTensorRTProviderOptionsV2 - trt_options_converted.trt_timing_cache_enalbed = 0; + trt_options_converted.trt_timing_cache_enable = 0; return trt_options_converted; } @@ -1186,7 +1186,7 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti std::shared_ptr CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* provider_options) { OrtTensorRTProviderOptionsV2 trt_options_converted = onnxruntime::OrtTensorRTProviderOptionsToOrtTensorRTProviderOptionsV2(provider_options); if (auto* provider = s_library_tensorrt.Get()) - return provider->CreateExecutionProviderFactory(provider_options); + return provider->CreateExecutionProviderFactory(&trt_options_converted); return nullptr; } From 5b52f63e2790050d55ad0610bbc65025ca7ef01d Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 15 Jan 2022 00:16:33 +0000 Subject: [PATCH 07/30] fix bug --- .../tensorrt/tensorrt_provider_factory.cc | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc index 10ab50ad4f0f2..12ec9f51d6eb6 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc @@ -6,6 +6,7 @@ #include #include "tensorrt_execution_provider.h" #include "core/framework/provider_options.h" +#include "core/providers/tensorrt/tensorrt_provider_options.h" #include using namespace onnxruntime; @@ -48,28 +49,28 @@ struct Tensorrt_Provider : Provider { } std::shared_ptr CreateExecutionProviderFactory(const void* provider_options) override { - auto& options = *reinterpret_cast(provider_options); + auto options = reinterpret_cast(provider_options); TensorrtExecutionProviderInfo info; - info.device_id = options.device_id; - info.has_user_compute_stream = options.has_user_compute_stream != 0; - info.user_compute_stream = options.user_compute_stream; + info.device_id = options->device_id; + info.has_user_compute_stream = options->has_user_compute_stream != 0; + info.user_compute_stream = options->user_compute_stream; info.has_trt_options = true; - info.max_partition_iterations = options.trt_max_partition_iterations; - info.min_subgraph_size = options.trt_min_subgraph_size; - info.max_workspace_size = options.trt_max_workspace_size; - info.fp16_enable = options.trt_fp16_enable != 0; - info.int8_enable = options.trt_int8_enable != 0; - info.int8_calibration_table_name = options.trt_int8_calibration_table_name == nullptr ? "" : options.trt_int8_calibration_table_name; - info.int8_use_native_calibration_table = options.trt_int8_use_native_calibration_table != 0; - info.dla_enable = options.trt_dla_enable != 0; - info.dla_core = options.trt_dla_core; - info.dump_subgraphs = options.trt_dump_subgraphs != 0; - info.engine_cache_enable = options.trt_engine_cache_enable != 0; - info.engine_cache_path = options.trt_engine_cache_path == nullptr ? "" : options.trt_engine_cache_path; - info.engine_decryption_enable = options.trt_engine_decryption_enable != 0; - info.engine_decryption_lib_path = options.trt_engine_decryption_lib_path == nullptr ? "" : options.trt_engine_decryption_lib_path; - info.force_sequential_engine_build = options.trt_force_sequential_engine_build != 0; - info.timing_cache_enable = options.trt_timing_cache_enable; + info.max_partition_iterations = options->trt_max_partition_iterations; + info.min_subgraph_size = options->trt_min_subgraph_size; + info.max_workspace_size = options->trt_max_workspace_size; + info.fp16_enable = options->trt_fp16_enable != 0; + info.int8_enable = options->trt_int8_enable != 0; + info.int8_calibration_table_name = options->trt_int8_calibration_table_name == nullptr ? "" : options->trt_int8_calibration_table_name; + info.int8_use_native_calibration_table = options->trt_int8_use_native_calibration_table != 0; + info.dla_enable = options->trt_dla_enable != 0; + info.dla_core = options->trt_dla_core; + info.dump_subgraphs = options->trt_dump_subgraphs != 0; + info.engine_cache_enable = options->trt_engine_cache_enable != 0; + info.engine_cache_path = options->trt_engine_cache_path == nullptr ? "" : options->trt_engine_cache_path; + info.engine_decryption_enable = options->trt_engine_decryption_enable != 0; + info.engine_decryption_lib_path = options->trt_engine_decryption_lib_path == nullptr ? "" : options->trt_engine_decryption_lib_path; + info.force_sequential_engine_build = options->trt_force_sequential_engine_build != 0; + info.timing_cache_enable = options->trt_timing_cache_enable; return std::make_shared(info); } From 4ff502d482c4c26eac8578f0cb9a8b730881e178 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 15 Jan 2022 00:19:28 +0000 Subject: [PATCH 08/30] revert modification --- .../tensorrt/tensorrt_provider_factory.cc | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc index 12ec9f51d6eb6..80772436c9e79 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc @@ -49,28 +49,28 @@ struct Tensorrt_Provider : Provider { } std::shared_ptr CreateExecutionProviderFactory(const void* provider_options) override { - auto options = reinterpret_cast(provider_options); + auto& options = *reinterpret_cast(provider_options); TensorrtExecutionProviderInfo info; - info.device_id = options->device_id; - info.has_user_compute_stream = options->has_user_compute_stream != 0; - info.user_compute_stream = options->user_compute_stream; + info.device_id = options.device_id; + info.has_user_compute_stream = options.has_user_compute_stream != 0; + info.user_compute_stream = options.user_compute_stream; info.has_trt_options = true; - info.max_partition_iterations = options->trt_max_partition_iterations; - info.min_subgraph_size = options->trt_min_subgraph_size; - info.max_workspace_size = options->trt_max_workspace_size; - info.fp16_enable = options->trt_fp16_enable != 0; - info.int8_enable = options->trt_int8_enable != 0; - info.int8_calibration_table_name = options->trt_int8_calibration_table_name == nullptr ? "" : options->trt_int8_calibration_table_name; - info.int8_use_native_calibration_table = options->trt_int8_use_native_calibration_table != 0; - info.dla_enable = options->trt_dla_enable != 0; - info.dla_core = options->trt_dla_core; - info.dump_subgraphs = options->trt_dump_subgraphs != 0; - info.engine_cache_enable = options->trt_engine_cache_enable != 0; - info.engine_cache_path = options->trt_engine_cache_path == nullptr ? "" : options->trt_engine_cache_path; - info.engine_decryption_enable = options->trt_engine_decryption_enable != 0; - info.engine_decryption_lib_path = options->trt_engine_decryption_lib_path == nullptr ? "" : options->trt_engine_decryption_lib_path; - info.force_sequential_engine_build = options->trt_force_sequential_engine_build != 0; - info.timing_cache_enable = options->trt_timing_cache_enable; + info.max_partition_iterations = options.trt_max_partition_iterations; + info.min_subgraph_size = options.trt_min_subgraph_size; + info.max_workspace_size = options.trt_max_workspace_size; + info.fp16_enable = options.trt_fp16_enable != 0; + info.int8_enable = options.trt_int8_enable != 0; + info.int8_calibration_table_name = options.trt_int8_calibration_table_name == nullptr ? "" : options.trt_int8_calibration_table_name; + info.int8_use_native_calibration_table = options.trt_int8_use_native_calibration_table != 0; + info.dla_enable = options.trt_dla_enable != 0; + info.dla_core = options.trt_dla_core; + info.dump_subgraphs = options.trt_dump_subgraphs != 0; + info.engine_cache_enable = options.trt_engine_cache_enable != 0; + info.engine_cache_path = options.trt_engine_cache_path == nullptr ? "" : options.trt_engine_cache_path; + info.engine_decryption_enable = options.trt_engine_decryption_enable != 0; + info.engine_decryption_lib_path = options.trt_engine_decryption_lib_path == nullptr ? "" : options.trt_engine_decryption_lib_path; + info.force_sequential_engine_build = options.trt_force_sequential_engine_build != 0; + info.timing_cache_enable = options.trt_timing_cache_enable; return std::make_shared(info); } From 2dd319437f2ba4cd86e1c277a2371a336f70687e Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 15 Jan 2022 00:33:50 +0000 Subject: [PATCH 09/30] small modification --- .../core/providers/tensorrt/tensorrt_execution_provider.cc | 1 + .../core/providers/tensorrt/tensorrt_execution_provider.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 4e0b35152b549..d1b2e8895759f 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -468,6 +468,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv engine_decryption_lib_path_ = info.engine_decryption_lib_path; } force_sequential_engine_build_ = info.force_sequential_engine_build; + timing_cache_enable_ = info.timing_cache_enable; } else { const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations); if (!max_partition_iterations_env.empty()) { diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 46f90565f88a0..e8bbd44ea961a 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -169,7 +169,7 @@ class TensorrtExecutionProvider : public IExecutionProvider { bool engine_decryption_enable_ = false; int (*engine_decryption_)(const char*, char*, size_t*); int (*engine_encryption_)(const char*, char*, size_t); - bool timing_cache_enable_ = true; + bool timing_cache_enable_ = false; std::unordered_map> parsers_; std::unordered_map> engines_; From 8d75a7094c44b3ed6f604b81d65dfcf8948745de Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 15 Jan 2022 00:38:27 +0000 Subject: [PATCH 10/30] remove intrumentation code for recording engine build latency --- .../tensorrt/tensorrt_execution_provider.cc | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index d1b2e8895759f..f14b5b418ed57 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -19,9 +19,6 @@ #include #include #include -#include -#include -#include #include "flatbuffers/idl.h" #include "ort_trt_int8_cal_table.fbs.h" @@ -1384,9 +1381,6 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse } } - LOGS_DEFAULT(WARNING) << timing_cache_enable_; - LOGS_DEFAULT(WARNING) << timing_cache_path; - // Load timing cache from file. Create a fresh cache if the file doesn't exist std::unique_ptr timing_cache = nullptr; if (timing_cache_enable_) { @@ -1402,11 +1396,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse // Build engine { auto lock = GetEngineBuildLock(); - auto start = std::chrono::high_resolution_clock::now(); trt_engine = tensorrt_ptr::unique_pointer(trt_builder->buildEngineWithConfig(*trt_network, *trt_config)); - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration duration = end - start; - LOGS_DEFAULT(WARNING) << "Elapsed time (in Compile) in milliseconds: " << duration.count(); } if (trt_engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, @@ -1757,8 +1747,6 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse trt_config->setDLACore(trt_state->dla_core); } - LOGS_DEFAULT(WARNING) << timing_cache_enable_; - LOGS_DEFAULT(WARNING) << timing_cache_path; // Load timing cache from file. Create a fresh cache if the file doesn't exist std::unique_ptr timing_cache = nullptr; if (trt_state->timing_cache_enable) { @@ -1774,12 +1762,8 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse // Build engine { auto lock = GetEngineBuildLock(); - auto start = std::chrono::high_resolution_clock::now(); *(trt_state->engine) = tensorrt_ptr::unique_pointer( trt_builder->buildEngineWithConfig(*trt_state->network->get(), *trt_config)); - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration duration = end - start; - LOGS_DEFAULT(WARNING) << "Elapsed time (in compute_func) in milliseconds: " << duration.count(); } if (trt_state->engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP Failed to Build Engine."); From 58e37fc17748578b9edeee6cde21b24024b382a6 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 15 Jan 2022 21:34:59 +0000 Subject: [PATCH 11/30] add timing_cache_enable as additional member of internal TensorRT provider options struct --- .../core/session/onnxruntime_cxx_api.h | 1 + .../core/session/onnxruntime_cxx_inline.h | 5 +++++ onnxruntime/core/session/provider_bridge_ort.cc | 17 ++++++++++++++++- onnxruntime/test/perftest/ort_test_session.cc | 17 ++++++++++++++--- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 12370aafa80d4..e9ee68b8032fe 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -351,6 +351,7 @@ struct SessionOptions : Base { SessionOptions& AppendExecutionProvider_ROCM(const OrtROCMProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_ROCM SessionOptions& AppendExecutionProvider_OpenVINO(const OrtOpenVINOProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_OpenVINO SessionOptions& AppendExecutionProvider_TensorRT(const OrtTensorRTProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_TensorRT + SessionOptions& AppendExecutionProvider_TensorRT_V2(const OrtTensorRTProviderOptionsV2& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_TensorRT SessionOptions& AppendExecutionProvider_MIGraphX(const OrtMIGraphXProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_MIGraphX SessionOptions& SetCustomCreateThreadFn(OrtCustomCreateThreadFn ort_custom_create_thread_fn); ///< Wraps OrtApi::SessionOptionsSetCustomCreateThreadFn diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index d281bb5542797..063acb1702a84 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -518,6 +518,11 @@ inline SessionOptions& SessionOptions::AppendExecutionProvider_TensorRT(const Or return *this; } +inline SessionOptions& SessionOptions::AppendExecutionProvider_TensorRT_V2(const OrtTensorRTProviderOptionsV2& provider_options) { + ThrowOnError(GetApi().SessionOptionsAppendExecutionProvider_TensorRT_V2(p_, &provider_options)); + return *this; +} + inline SessionOptions& SessionOptions::AppendExecutionProvider_MIGraphX(const OrtMIGraphXProviderOptions& provider_options) { ThrowOnError(GetApi().SessionOptionsAppendExecutionProvider_MIGraphX(p_, &provider_options)); return *this; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 9ec828d28cd01..7088df707a2af 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1191,6 +1191,13 @@ std::shared_ptr CreateExecutionProviderFactory_Tensor return nullptr; } +std::shared_ptr CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptionsV2* provider_options) { + if (auto* provider = s_library_tensorrt.Get()) + return provider->CreateExecutionProviderFactory(provider_options); + + return nullptr; +} + std::shared_ptr CreateExecutionProviderFactory_MIGraphX(const OrtMIGraphXProviderOptions* provider_options) { if (auto* provider = s_library_migraphx.Get()) return provider->CreateExecutionProviderFactory(provider_options); @@ -1470,7 +1477,15 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_ROCM, _In_ Or } ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2, _In_ OrtSessionOptions* options, _In_ const OrtTensorRTProviderOptionsV2* tensorrt_options) { - return OrtApis::SessionOptionsAppendExecutionProvider_TensorRT(options, reinterpret_cast(tensorrt_options)); + API_IMPL_BEGIN + auto factory = onnxruntime::CreateExecutionProviderFactory_Tensorrt(tensorrt_options); + if (!factory) { + return OrtApis::CreateStatus(ORT_FAIL, "OrtSessionOptionsAppendExecutionProvider_TensorRT: Failed to load shared library"); + } + + options->provider_factories.push_back(factory); + return nullptr; + API_IMPL_END } ORT_API_STATUS_IMPL(OrtApis::CreateTensorRTProviderOptions, _Outptr_ OrtTensorRTProviderOptionsV2** out) { diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 551709ea621cd..866494d992006 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -1,6 +1,7 @@ #include "ort_test_session.h" #include #include "core/session/onnxruntime_session_options_config_keys.h" +#include "core/providers/tensorrt/tensorrt_provider_options.h" #include #include "providers.h" #include "TestCase.h" @@ -74,6 +75,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device bool trt_engine_decryption_enable = false; std::string trt_engine_decryption_lib_path = ""; bool trt_force_sequential_engine_build = false; + bool trt_timing_cache_enable = false; #ifdef _MSC_VER std::string ov_string = ToMBString(performance_test_config.run_config.ep_runtime_config_string); @@ -205,11 +207,19 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } else { ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_sequential_engine_build' should be a boolean i.e. true or false. Default value is false.\n"); } + } else if (key == "trt_timing_cache_enable") { + if (value == "true" || value == "True") { + trt_timing_cache_enable = true; + } else if (value == "false" || value == "False") { + trt_timing_cache_enable = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_timing_cache_enable' should be a boolean i.e. true or false. Default value is false.\n"); + } } else { - ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['device_id', 'trt_max_partition_iterations', 'trt_min_subgraph_size', 'trt_max_workspace_size', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table', 'trt_dla_enable', 'trt_dla_core', 'trt_dump_subgraphs', 'trt_engine_cache_enable', 'trt_engine_cache_path', 'trt_engine_decryption_enable', 'trt_engine_decryption_lib_path', 'trt_force_sequential_engine_build'] \n"); + ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['device_id', 'trt_max_partition_iterations', 'trt_min_subgraph_size', 'trt_max_workspace_size', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table', 'trt_dla_enable', 'trt_dla_core', 'trt_dump_subgraphs', 'trt_engine_cache_enable', 'trt_engine_cache_path', 'trt_engine_decryption_enable', 'trt_engine_decryption_lib_path', 'trt_force_sequential_engine_build', 'trt_timing_cache_enable'] \n"); } } - OrtTensorRTProviderOptions tensorrt_options; + OrtTensorRTProviderOptionsV2 tensorrt_options; tensorrt_options.device_id = device_id; tensorrt_options.has_user_compute_stream = 0; tensorrt_options.user_compute_stream = nullptr; @@ -228,7 +238,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device tensorrt_options.trt_engine_decryption_enable = trt_engine_decryption_enable; tensorrt_options.trt_engine_decryption_lib_path = trt_engine_decryption_lib_path.c_str(); tensorrt_options.trt_force_sequential_engine_build = trt_force_sequential_engine_build; - session_options.AppendExecutionProvider_TensorRT(tensorrt_options); + tensorrt_options.trt_timing_cache_enable = trt_timing_cache_enable; + session_options.AppendExecutionProvider_TensorRT_V2(tensorrt_options); OrtCUDAProviderOptions cuda_options; cuda_options.device_id=device_id; From b707c65d43021eb7a41885abba59844f6bba6a69 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 17 Jan 2022 21:00:33 +0000 Subject: [PATCH 12/30] fix warning --- .../core/providers/tensorrt/tensorrt_provider_factory.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc index 80772436c9e79..6e8f8be6f5cd7 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc @@ -70,7 +70,7 @@ struct Tensorrt_Provider : Provider { info.engine_decryption_enable = options.trt_engine_decryption_enable != 0; info.engine_decryption_lib_path = options.trt_engine_decryption_lib_path == nullptr ? "" : options.trt_engine_decryption_lib_path; info.force_sequential_engine_build = options.trt_force_sequential_engine_build != 0; - info.timing_cache_enable = options.trt_timing_cache_enable; + info.timing_cache_enable = options.trt_timing_cache_enable != 0; return std::make_shared(info); } From 48ecbeb563b17b995229545ce913f60221d8d156 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 28 Jan 2022 23:23:55 +0000 Subject: [PATCH 13/30] enable trt timing cache for model tests --- onnxruntime/test/providers/cpu/model_tests.cc | 20 +++++++++++++------ onnxruntime/test/util/default_providers.cc | 10 ++++++++++ .../test/util/include/default_providers.h | 1 + 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index fbdb421e5fa46..59af6ea232522 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -18,6 +18,7 @@ #include "test/onnx/heap_buffer.h" #include "test/onnx/onnx_model_info.h" #include "test/onnx/callback.h" +#include "core/providers/tensorrt/tensorrt_provider_options.h" extern std::unique_ptr ort_env; @@ -587,15 +588,14 @@ TEST_P(ModelTest, Run) { } else if (provider_name == "nuphar") { ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultNupharExecutionProvider())); } else if (provider_name == "tensorrt") { - if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) { - OrtTensorRTProviderOptions params{ + OrtTensorRTProviderOptionsV2 params{ 0, 0, nullptr, 1000, 1, 1 << 30, - 1, // enable fp16 + 0, 0, nullptr, 0, @@ -603,13 +603,21 @@ TEST_P(ModelTest, Run) { 0, 0, 0, - nullptr, +#ifdef _WIN32 + "C:\\local\\trt_timing_cache", // directory where timing caches locate in CI Windows image +#else + "/data/trt_timing_cache", // directory where timing caches locate in CI Linux image +#endif 0, nullptr, - 0}; + 0, + 1 // enable trt timing cache to reduce CI testing time for trt ep + }; + if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) { + params.trt_fp16_enable = 1; ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(TensorrtExecutionProviderWithOptions(¶ms))); } else { - ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultTensorrtExecutionProvider())); + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultTensorrtExecutionProvider(¶ms))); } ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider())); } else if (provider_name == "migraphx") { diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index 209d4244229fc..5e0975d667fb9 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -54,6 +54,16 @@ std::unique_ptr TensorrtExecutionProviderWithOptions(const O return nullptr; } +std::unique_ptr TensorrtExecutionProviderWithOptions(const OrtTensorRTProviderOptionsV2* params) { +#ifdef USE_TENSORRT + if (auto factory = CreateExecutionProviderFactory_Tensorrt(params)) + return factory->CreateProvider(); +#else + ORT_UNUSED_PARAMETER(params); +#endif + return nullptr; +} + std::unique_ptr DefaultMIGraphXExecutionProvider() { #ifdef USE_MIGRAPHX OrtMIGraphXProviderOptions params{ diff --git a/onnxruntime/test/util/include/default_providers.h b/onnxruntime/test/util/include/default_providers.h index 6fa50c61cdefa..04f25d1990126 100644 --- a/onnxruntime/test/util/include/default_providers.h +++ b/onnxruntime/test/util/include/default_providers.h @@ -24,6 +24,7 @@ std::shared_ptr CreateExecutionProviderFactory_OpenVI std::shared_ptr CreateExecutionProviderFactory_Rknpu(); std::shared_ptr CreateExecutionProviderFactory_Rocm(const OrtROCMProviderOptions* provider_options); std::shared_ptr CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params); +std::shared_ptr CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptionsV2* params); // EP for internal testing std::shared_ptr CreateExecutionProviderFactory_InternalTesting(const std::unordered_set& supported_ops); From 9dc0d162e45f6ac4b850606672ddd356d1ff00a6 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 29 Jan 2022 18:05:47 +0000 Subject: [PATCH 14/30] enable timing cache for model tests --- .../tensorrt/tensorrt_execution_provider.cc | 17 +++++++++-------- onnxruntime/test/providers/cpu/model_tests.cc | 2 +- .../test/util/include/default_providers.h | 1 + 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index f14b5b418ed57..0db5a9eb17825 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -457,7 +457,8 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv } dump_subgraphs_ = info.dump_subgraphs; engine_cache_enable_ = info.engine_cache_enable; - if (engine_cache_enable_ || int8_enable_) { + timing_cache_enable_ = info.timing_cache_enable; + if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) { cache_path_ = info.engine_cache_path; } engine_decryption_enable_ = info.engine_decryption_enable; @@ -465,7 +466,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv engine_decryption_lib_path_ = info.engine_decryption_lib_path; } force_sequential_engine_build_ = info.force_sequential_engine_build; - timing_cache_enable_ = info.timing_cache_enable; } else { const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations); if (!max_partition_iterations_env.empty()) { @@ -528,7 +528,12 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv engine_cache_enable_ = (std::stoi(engine_cache_enable_env) == 0 ? false : true); } - if (engine_cache_enable_ || int8_enable_) { + const std::string timing_cache_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kTimingCacheEnable); + if (!timing_cache_enable_env.empty()) { + timing_cache_enable_ = (std::stoi(timing_cache_enable_env) == 0 ? false : true); + } + + if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) { const std::string engine_cache_path = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEngineCachePath); cache_path_ = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kCachePath); if (!engine_cache_path.empty() && cache_path_.empty()) { @@ -551,10 +556,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv force_sequential_engine_build_ = (std::stoi(force_sequential_engine_build_env) == 0 ? false : true); } - const std::string timing_cache_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kTimingCacheEnable); - if (!timing_cache_enable_env.empty()) { - timing_cache_enable_ = (std::stoi(timing_cache_enable_env) == 0 ? false : true); - } } // Validate setting @@ -575,7 +576,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv dla_core_ = 0; } - if (engine_cache_enable_ || int8_enable_) { + if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) { if (!cache_path_.empty() && !fs::is_directory(cache_path_)) { if (!fs::create_directory(cache_path_)) { throw std::runtime_error("Failed to create directory " + cache_path_); diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index 88db8f2cf8849..399cb65829549 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -620,7 +620,7 @@ TEST_P(ModelTest, Run) { params.trt_fp16_enable = 1; ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(TensorrtExecutionProviderWithOptions(¶ms))); } else { - ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultTensorrtExecutionProvider(¶ms))); + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(TensorrtExecutionProviderWithOptions(¶ms))); } ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider())); } else if (provider_name == "migraphx") { diff --git a/onnxruntime/test/util/include/default_providers.h b/onnxruntime/test/util/include/default_providers.h index 04f25d1990126..980129e95c7c4 100644 --- a/onnxruntime/test/util/include/default_providers.h +++ b/onnxruntime/test/util/include/default_providers.h @@ -39,6 +39,7 @@ std::unique_ptr DefaultNupharExecutionProvider(bool allow_un //std::unique_ptr DefaultStvmExecutionProvider(); std::unique_ptr DefaultTensorrtExecutionProvider(); std::unique_ptr TensorrtExecutionProviderWithOptions(const OrtTensorRTProviderOptions* params); +std::unique_ptr TensorrtExecutionProviderWithOptions(const OrtTensorRTProviderOptionsV2* params); std::unique_ptr DefaultMIGraphXExecutionProvider(); std::unique_ptr MIGraphXExecutionProviderWithOptions(const OrtMIGraphXProviderOptions* params); std::unique_ptr DefaultOpenVINOExecutionProvider(); From 0f378b1353736ecf76baa4fa7fd034d284d3887c Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 29 Jan 2022 19:05:22 +0000 Subject: [PATCH 15/30] change pool --- .../github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml index ad6a5d2a4d555..e2c65b5121548 100644 --- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml @@ -1,6 +1,6 @@ jobs: - job: 'build' - pool: 'onnxruntime-tensorrt8-winbuild' + pool: 'onnxruntime-gpu-tensorrt8-winbuild ' variables: OrtPackageId: 'Microsoft.ML.OnnxRuntime' MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary' From 72b76457b4a9ffa4dac25f11a71a2f54ab70b831 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 29 Jan 2022 21:19:55 +0000 Subject: [PATCH 16/30] change back previous pool --- .../github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml index e2c65b5121548..323c8fa4e6e87 100644 --- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml @@ -1,6 +1,6 @@ jobs: - job: 'build' - pool: 'onnxruntime-gpu-tensorrt8-winbuild ' + pool: 'onnxruntime-tensorrt8-winbuild ' variables: OrtPackageId: 'Microsoft.ML.OnnxRuntime' MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary' From 2c5ac28636af26bb1f2ed084e86bc422b7c8b5e5 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 31 Jan 2022 22:44:38 +0000 Subject: [PATCH 17/30] change path of trt_timing_cache --- onnxruntime/test/providers/cpu/model_tests.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index 399cb65829549..d455139369ed3 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -607,9 +607,9 @@ TEST_P(ModelTest, Run) { 0, 0, #ifdef _WIN32 - "C:\\local\\trt_timing_cache", // directory where timing caches locate in CI Windows image + "C:\\local\\models\\trt_timing_cache", // directory where timing caches locate in CI Windows image #else - "/data/trt_timing_cache", // directory where timing caches locate in CI Linux image + "/data/models/trt_timing_cache", // directory where timing caches locate in CI Linux image #endif 0, nullptr, From 93e61f0906261bc47409366d5d6ca94cdce44232 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 4 Feb 2022 22:19:02 +0000 Subject: [PATCH 18/30] refactor code --- onnxruntime/test/providers/cpu/model_tests.cc | 11 +++-------- .../azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index d455139369ed3..b7890e1b8bff1 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -606,21 +606,16 @@ TEST_P(ModelTest, Run) { 0, 0, 0, -#ifdef _WIN32 - "C:\\local\\models\\trt_timing_cache", // directory where timing caches locate in CI Windows image -#else - "/data/models/trt_timing_cache", // directory where timing caches locate in CI Linux image -#endif + nullptr, 0, nullptr, 0, - 1 // enable trt timing cache to reduce CI testing time for trt ep - }; + 0}; if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) { params.trt_fp16_enable = 1; ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(TensorrtExecutionProviderWithOptions(¶ms))); } else { - ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(TensorrtExecutionProviderWithOptions(¶ms))); + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultTensorrtExecutionProvider())); } ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider())); } else if (provider_name == "migraphx") { diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml index 323c8fa4e6e87..ad6a5d2a4d555 100644 --- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml @@ -1,6 +1,6 @@ jobs: - job: 'build' - pool: 'onnxruntime-tensorrt8-winbuild ' + pool: 'onnxruntime-tensorrt8-winbuild' variables: OrtPackageId: 'Microsoft.ML.OnnxRuntime' MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary' From c9813c2185ece8d8b5ca9d7535fe24a98dfc0f18 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 4 Feb 2022 22:19:52 +0000 Subject: [PATCH 19/30] Add test cases for timing cache --- .../providers/tensorrt/tensorrt_basic_test.cc | 265 ++++++++++++++++++ 1 file changed, 265 insertions(+) diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index c24401a1b89a3..2085d1e3230d9 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -7,6 +7,11 @@ #include "gtest/gtest.h" #include "test/util/include/default_providers.h" #include "test/util/include/scoped_env_vars.h" +#include "core/providers/tensorrt/tensorrt_provider_options.h" +#include +#include +#include +namespace fs = std::filesystem; using namespace std; using namespace ONNX_NAMESPACE; @@ -15,6 +20,8 @@ using namespace ::onnxruntime::logging; namespace onnxruntime { namespace test { +class TensorrtExecutionProviderCacheTest: public testing::TestWithParam> {}; + template void VerifyOutputs(const std::vector& fetches, const std::vector& expected_dims, const std::vector& expected_values) { @@ -26,6 +33,264 @@ void VerifyOutputs(const std::vector& fetches, const std::vector path, std::basic_string file_extension) { + for (const auto & entry : fs::directory_iterator(path)) { + if (file_extension.compare(fs::path(entry).extension()) == 0) { + return true; + } + } + return false; +} + +void RemoveTensorRTCache(std::basic_string path, std::basic_string file_extension) { + for (const auto & entry : fs::directory_iterator(path)) { + if (file_extension.compare(fs::path(entry).extension()) == 0) { + fs::remove(entry); + } + } +} + +void CreateBaseModel(std::basic_string model_name, std::basic_string graph_name, bool is_dynamic_input_shape, std::vector dims) { + onnxruntime::Model model(graph_name, false, DefaultLoggingManager().DefaultLogger()); + auto& graph = model.MainGraph(); + std::vector inputs; + std::vector outputs; + + // FLOAT tensor + ONNX_NAMESPACE::TypeProto float_tensor; + float_tensor.mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + + for (auto dim: dims) { + float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(dim); + } + + if (is_dynamic_input_shape) { + float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("sym1"); + float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("sym2"); + } + + auto& input_arg_1 = graph.GetOrCreateNodeArg("X", &float_tensor); + auto& input_arg_2 = graph.GetOrCreateNodeArg("Y", &float_tensor); + inputs.push_back(&input_arg_1); + inputs.push_back(&input_arg_2); + auto& output_arg = graph.GetOrCreateNodeArg("node_1_out_1", &float_tensor); + outputs.push_back(&output_arg); + graph.AddNode("node_1", "Add", "node 1.", inputs, outputs); + + auto& input_arg_3 = graph.GetOrCreateNodeArg("Z", &float_tensor); + inputs.clear(); + inputs.push_back(&output_arg); + inputs.push_back(&input_arg_3); + auto& output_arg_2 = graph.GetOrCreateNodeArg("M", &float_tensor); + outputs.clear(); + outputs.push_back(&output_arg_2); + graph.AddNode("node_2", "Add", "node 2.", inputs, outputs); + + auto status = graph.Resolve(); + ASSERT_TRUE(status.IsOK()); + status = onnxruntime::Model::Save(model, model_name); +} + +TEST_P(TensorrtExecutionProviderCacheTest, Run) { + // GetParam() consists of two main parameters: + // - cache type (engine cache, profile cache and timing cache) + // - input type (dynamic input shape or static input shape). + // Note: it might have other paramters used for specific situation + std::basic_string param = GetParam(); + std::basic_string input_type = "static"; + std::basic_string engine_info = "enginecache_disable"; // for timigh cache case only + size_t pos = param.find(ORT_TSTR("_")); + ASSERT_NE(pos, std::string::npos); + std::basic_string cache_type = ToUTF8String(param.substr(0, pos)); + if (cache_type.compare("timing") == 0) { + std::basic_string suffix = param.substr(pos + 1); + size_t suffix_pos = suffix.find(ORT_TSTR("_")); + input_type = ToUTF8String(suffix.substr(0, suffix_pos)); + engine_info = suffix.substr(suffix_pos + 1); + } else { + input_type = param.substr(pos + 1); + } + + std::basic_string model_name = "trt_execution_provider_" + cache_type + "caching_test_" + input_type + ".onnx"; + std::vector dims; // static dims + if (input_type.compare("dynamic") == 0) { + dims.push_back(1); + CreateBaseModel(model_name, cache_type + "cachingtest", true, dims); // dynamic input shape + // dims is (1, sym1, sym2) + } + else { + dims.push_back(1); + dims.push_back(3); + dims.push_back(2); + CreateBaseModel(model_name, cache_type + "cachingtest", false, dims); // non-dynamic input shape + // dims is (1, 3, 2) + } + + SessionOptions so; + so.session_logid = "TensorrtExecutionProvider" + cache_type + "cacheTest"; + RunOptions run_options; + run_options.run_tag = so.session_logid; + InferenceSession session_object{so, GetEnvironment()}; + auto allocator_manager = session_object.GetAllocatorManager(); + auto cuda_provider = DefaultCudaExecutionProvider(); + cuda_provider->RegisterAllocator(allocator_manager); + auto cpu_allocator = cuda_provider->GetAllocator(0, OrtMemTypeCPU); + std::vector dims_mul_x = {1, 3, 2}; + std::vector values_mul_x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + OrtValue ml_value_x; + CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_x); + OrtValue ml_value_y; + CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_y); + OrtValue ml_value_z; + CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_z); + NameMLValMap feeds; + feeds.insert(std::make_pair("X", ml_value_x)); + feeds.insert(std::make_pair("Y", ml_value_y)); + feeds.insert(std::make_pair("Z", ml_value_z)); + + // prepare outputs + std::vector output_names; + output_names.push_back("M"); + std::vector fetches; + + // prepare expected inputs and outputs + std::vector expected_dims_mul_m = {1, 3, 2}; + std::vector expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f}; + + OrtTensorRTProviderOptionsV2 params{ + 0, + 0, + nullptr, + 1000, + 1, + 1 << 30, + 0, + 0, + nullptr, + 0, + 0, + 0, + 0, + 0, + nullptr, + 0, + nullptr, + 0, + 0}; + + if (cache_type.compare("timing") == 0) { + + // create ort session + params.trt_timing_cache_enable = 1; + if (engine_info.compare("enginecache_enable") == 0) + params.trt_engine_cache_enable = 1; + std::unique_ptr execution_provider = TensorrtExecutionProviderWithOptions(¶ms); + EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); + auto status = session_object.Load(model_name); + ASSERT_TRUE(status.IsOK()); + status = session_object.Initialize(); + ASSERT_TRUE(status.IsOK()); + + // run inference + // timing cache should be created under the situation of non-dynamic/dynamic shape input and engine cache enabled/disabled + status = session_object.Run(run_options, feeds, output_names, &fetches); + ASSERT_TRUE(status.IsOK()); + VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m); + ASSERT_TRUE(IsTensorRTCacheExisted("./", ".timing")); + RemoveTensorRTCache("./", ".timing"); + + // run inference + // timing cache shoud not be used or created since input shape is not changed and engine won't be re-built + status = session_object.Run(run_options, feeds, output_names, &fetches); + ASSERT_TRUE(status.IsOK()); + VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m); + ASSERT_TRUE(!IsTensorRTCacheExisted("./", ".timing")); + + // create another ort session to test + InferenceSession session_object_2{so, GetEnvironment()}; + execution_provider = TensorrtExecutionProviderWithOptions(¶ms); + EXPECT_TRUE(session_object_2.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); + status = session_object_2.Load(model_name); + ASSERT_TRUE(status.IsOK()); + status = session_object_2.Initialize(); + ASSERT_TRUE(status.IsOK()); + + if (engine_info.compare("enginecache_enable") == 0) { + // engine cache is enabled + + // run inference + // timing cache shoud not be created since engine cache is existed and will be used + status = session_object_2.Run(run_options, feeds, output_names, &fetches); + ASSERT_TRUE(status.IsOK()); + VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m); + ASSERT_TRUE(!IsTensorRTCacheExisted("./", ".timing")); + } else { + // engine cache is not enabled + + // run inference + // timing cache shoud be created + status = session_object_2.Run(run_options, feeds, output_names, &fetches); + ASSERT_TRUE(status.IsOK()); + VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m); + ASSERT_TRUE(IsTensorRTCacheExisted("./", ".timing")); + RemoveTensorRTCache("./", ".timing"); + } + + if (input_type.compare("dynamic") == 0) { + // dynamic input shape + + // inference run with input shape {1, 1, 6} + // timing cache will be created + // TRT engine and profile will be updated + dims_mul_x = {1, 1, 6}; + CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_x); + CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_y); + CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_z); + feeds.clear(); + feeds.insert(std::make_pair("X", ml_value_x)); + feeds.insert(std::make_pair("Y", ml_value_y)); + feeds.insert(std::make_pair("Z", ml_value_z)); + + status = session_object_2.Run(run_options, feeds, output_names, &fetches); + ASSERT_TRUE(status.IsOK()); + VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m); + ASSERT_TRUE(IsTensorRTCacheExisted("./", ".timing")); + } + + // clean up caches for another session + RemoveTensorRTCache("./", ".timing"); + RemoveTensorRTCache("./", ".profile"); + RemoveTensorRTCache("./", ".engine"); + + } else if (cache_type.compare("engine") == 0) { + // #TODO + } else if (cache_type.compare("profile") == 0) { + // #TODO + } +} + +auto ExpandModelName = [](const ::testing::TestParamInfo& info) { + // use info.param here to generate the test suffix + std::basic_string name = info.param; +#ifdef _WIN32 + // Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string. + // Need conversion of wchar_t to char. + return std::wstring_convert>().to_bytes(name); +#else + return name; +#endif +}; + +// timing_dynamic_enginecache_enable: timing cache enabled, dynamic input shape and engine cache enable +// timing_dynamic_enginecache_disable: timing cache enabled, dynamic input shape and engine cache disable +// timing_static_enginecache_enable: timing cache enabled, static input shape and engine cache enable +// timing_static_enginecache_disable: timing cache enabled, static input shape and engine cache disable +INSTANTIATE_TEST_SUITE_P(TensorrtExecutionProviderCacheTests, TensorrtExecutionProviderCacheTest, testing::Values("timing_dynamic_enginecache_enable", + "timing_dynamic_enginecache_disable", + "timing_static_enginecache_enable", + "timing_static_enginecache_disable"), + ExpandModelName); + TEST(TensorrtExecutionProviderTest, EngineCachingTest) { ScopedEnvironmentVariables scoped_env_vars{EnvVarMap{ {"ORT_TENSORRT_ENGINE_CACHE_ENABLE", {"1"}}, From e69723e82f1c2a667d6390be2cf901fec5a831da Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 4 Feb 2022 23:07:18 +0000 Subject: [PATCH 20/30] fix bug --- onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index 2085d1e3230d9..4705821f1444a 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -269,7 +269,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) { } } -auto ExpandModelName = [](const ::testing::TestParamInfo& info) { +auto AddTestName = [](const ::testing::TestParamInfo& info) { // use info.param here to generate the test suffix std::basic_string name = info.param; #ifdef _WIN32 @@ -289,7 +289,7 @@ INSTANTIATE_TEST_SUITE_P(TensorrtExecutionProviderCacheTests, TensorrtExecutionP "timing_dynamic_enginecache_disable", "timing_static_enginecache_enable", "timing_static_enginecache_disable"), - ExpandModelName); + AddTestName); TEST(TensorrtExecutionProviderTest, EngineCachingTest) { ScopedEnvironmentVariables scoped_env_vars{EnvVarMap{ From c371a6dc5e065f000e8bf3c8d6df15210a4992bd Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 5 Feb 2022 20:35:54 +0000 Subject: [PATCH 21/30] fix bug for CI --- .../providers/tensorrt/tensorrt_basic_test.cc | 64 +++++++++---------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index 4705821f1444a..c9ec5608a8541 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -20,7 +20,7 @@ using namespace ::onnxruntime::logging; namespace onnxruntime { namespace test { -class TensorrtExecutionProviderCacheTest: public testing::TestWithParam> {}; +class TensorrtExecutionProviderCacheTest: public testing::TestWithParam {}; template void VerifyOutputs(const std::vector& fetches, const std::vector& expected_dims, @@ -33,24 +33,24 @@ void VerifyOutputs(const std::vector& fetches, const std::vector path, std::basic_string file_extension) { +bool IsTensorRTCacheExisted(std::string path, std::string file_extension) { for (const auto & entry : fs::directory_iterator(path)) { - if (file_extension.compare(fs::path(entry).extension()) == 0) { + if (fs::path(file_extension) == fs::path(entry).extension()) { return true; } } return false; } -void RemoveTensorRTCache(std::basic_string path, std::basic_string file_extension) { +void RemoveTensorRTCache(std::string path, std::string file_extension) { for (const auto & entry : fs::directory_iterator(path)) { - if (file_extension.compare(fs::path(entry).extension()) == 0) { + if (fs::path(file_extension) == fs::path(entry).extension()) { fs::remove(entry); } } } -void CreateBaseModel(std::basic_string model_name, std::basic_string graph_name, bool is_dynamic_input_shape, std::vector dims) { +void CreateBaseModel(std::string model_name, std::string graph_name, bool is_dynamic_input_shape, std::vector dims) { onnxruntime::Model model(graph_name, false, DefaultLoggingManager().DefaultLogger()); auto& graph = model.MainGraph(); std::vector inputs; @@ -92,26 +92,24 @@ void CreateBaseModel(std::basic_string model_name, std::basic_string< } TEST_P(TensorrtExecutionProviderCacheTest, Run) { - // GetParam() consists of two main parameters: - // - cache type (engine cache, profile cache and timing cache) - // - input type (dynamic input shape or static input shape). - // Note: it might have other paramters used for specific situation - std::basic_string param = GetParam(); - std::basic_string input_type = "static"; - std::basic_string engine_info = "enginecache_disable"; // for timigh cache case only - size_t pos = param.find(ORT_TSTR("_")); + // GetParam() returns the parameter of following format: + // ##cache type##_##input shape type##_##other information if needed## + std::string param = GetParam(); + std::string input_type = "static"; + std::string engine_info = "enginecache_disable"; // for timigh cache case only + size_t pos = param.find("_"); ASSERT_NE(pos, std::string::npos); - std::basic_string cache_type = ToUTF8String(param.substr(0, pos)); + std::string cache_type = ToUTF8String(param.substr(0, pos)); if (cache_type.compare("timing") == 0) { - std::basic_string suffix = param.substr(pos + 1); - size_t suffix_pos = suffix.find(ORT_TSTR("_")); + std::string suffix = param.substr(pos + 1); + size_t suffix_pos = suffix.find("_"); input_type = ToUTF8String(suffix.substr(0, suffix_pos)); engine_info = suffix.substr(suffix_pos + 1); } else { input_type = param.substr(pos + 1); } - std::basic_string model_name = "trt_execution_provider_" + cache_type + "caching_test_" + input_type + ".onnx"; + std::string model_name = "trt_execution_provider_" + cache_type + "caching_test_" + input_type + ".onnx"; std::vector dims; // static dims if (input_type.compare("dynamic") == 0) { dims.push_back(1); @@ -269,27 +267,23 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) { } } -auto AddTestName = [](const ::testing::TestParamInfo& info) { - // use info.param here to generate the test suffix - std::basic_string name = info.param; -#ifdef _WIN32 - // Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string. - // Need conversion of wchar_t to char. - return std::wstring_convert>().to_bytes(name); -#else - return name; -#endif -}; - -// timing_dynamic_enginecache_enable: timing cache enabled, dynamic input shape and engine cache enable -// timing_dynamic_enginecache_disable: timing cache enabled, dynamic input shape and engine cache disable -// timing_static_enginecache_enable: timing cache enabled, static input shape and engine cache enable -// timing_static_enginecache_disable: timing cache enabled, static input shape and engine cache disable +// The TensorrtExecutionProviderCacheTest aims to test the functionality of all the engine/profile/timing caches of ORT TRT. +// It uses value-parameterized test and the parameter in the test is a composite parameter which has following format: +// ##cache type##_##input shape type##_##additional provider options if needed## +// - cache type (could be engine cache, profile cache or timing cache) +// - input shape type (could be dynamic input shape or static input shape). +// +// +// We have following test parameters: +// - timing_dynamic_enginecache_enable: timing cache enabled, dynamic input shape and engine cache enable +// - timing_dynamic_enginecache_disable: timing cache enabled, dynamic input shape and engine cache disable +// - timing_static_enginecache_enable: timing cache enabled, static input shape and engine cache enable +// - timing_static_enginecache_disable: timing cache enabled, static input shape and engine cache disable INSTANTIATE_TEST_SUITE_P(TensorrtExecutionProviderCacheTests, TensorrtExecutionProviderCacheTest, testing::Values("timing_dynamic_enginecache_enable", "timing_dynamic_enginecache_disable", "timing_static_enginecache_enable", "timing_static_enginecache_disable"), - AddTestName); + [](const ::testing::TestParamInfo& info) {return info.param;}); TEST(TensorrtExecutionProviderTest, EngineCachingTest) { ScopedEnvironmentVariables scoped_env_vars{EnvVarMap{ From e513740bbb0a61425a6e0d01b288a10278a64203 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 5 Feb 2022 20:57:49 +0000 Subject: [PATCH 22/30] fix bug --- onnxruntime/test/providers/cpu/model_tests.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index d557df31357c0..0a72deeb76d48 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -22,7 +22,6 @@ #include "test/onnx/heap_buffer.h" #include "test/onnx/onnx_model_info.h" #include "test/onnx/callback.h" -#include "core/providers/tensorrt/tensorrt_provider_options.h" extern std::unique_ptr ort_env; @@ -600,7 +599,7 @@ TEST_P(ModelTest, Run) { 1000, 1, 1 << 30, - 0, + 1, // enable fp16 0, nullptr, 0, From 4a45c30d4c18cdfc5b9f3e679c0ba8c56c263099 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Sat, 5 Feb 2022 22:58:27 +0000 Subject: [PATCH 23/30] fix bug --- onnxruntime/python/onnxruntime_pybind_state.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 56c59406bcd56..aa9e7df01bb9b 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -393,6 +393,7 @@ std::unique_ptr CreateExecutionProviderInstance( nullptr, 0, nullptr, + 0, 0}; for (auto option : it->second) { if (option.first == "device_id") { From e38556a26b018df4b241284a5414d1ed847f2cd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20M=C3=BCller?= Date: Wed, 8 Feb 2023 17:50:24 +0100 Subject: [PATCH 24/30] timing cache test --- .../external/onnxruntime_external_deps.cmake | 2 +- .../tensorrt/tensorrt_execution_provider.cc | 26 +- onnxruntime/test/providers/cpu/model_tests.cc | 2 +- .../providers/tensorrt/tensorrt_basic_test.cc | 325 ++++-------------- 4 files changed, 83 insertions(+), 272 deletions(-) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index 61844b36aa4b3..01ae640748ef0 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -455,7 +455,7 @@ if (onnxruntime_USE_CUDA) list(APPEND onnxruntime_LINK_DIRS ${onnxruntime_CUDA_HOME}/x64/lib64) else() if(onnxruntime_CUDNN_HOME) - list(APPEND onnxruntime_LINK_DIRS ${onnxruntime_CUDNN_HOME}/lib64) + list(APPEND onnxruntime_LINK_DIRS ${onnxruntime_CUDNN_HOME}/lib ${onnxruntime_CUDNN_HOME}/lib64) endif() list(APPEND onnxruntime_LINK_DIRS ${onnxruntime_CUDA_HOME}/lib64) endif() diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 9cbe018b3ffd4..82384d208bcc6 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1483,20 +1483,20 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector(serializedModel->data()), engine_size); } serializedModel->destroy(); - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized " + engine_cache_path; + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized engine " + engine_cache_path; + } + // serialize and save timing cache + if (timing_cache_enable_) + { + auto timing_cache = trt_config->getTimingCache(); + std::unique_ptr timingCacheHostData{timing_cache->serialize()}; + if (timingCacheHostData == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not serialize timing cache: " + timing_cache_path); + } + saveTimingCacheFile(timing_cache_path, timingCacheHostData.get()); + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path; } - } - - // serialize and save timing cache - if (timing_cache_enable_) - { - auto timing_cache = trt_config->getTimingCache(); - std::unique_ptr timingCacheHostData{timing_cache->serialize()}; - if (timingCacheHostData == nullptr) { - return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not serialize timing cache: " + timing_cache_path); - } - saveTimingCacheFile(timing_cache_path, timingCacheHostData.get()); } } diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index 206f934a342f7..5adfcda2a38ea 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -687,7 +687,7 @@ TEST_P(ModelTest, Run) { if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) { OrtTensorRTProviderOptionsV2 params{0, 0, nullptr, 1000, 1, 1 << 30, 1, // enable fp16 - 0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0}; + 0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0}; ASSERT_ORT_STATUS_OK(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2(ortso, ¶ms)); } else { OrtTensorRTProviderOptionsV2* ep_option; diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index 1e8ffe33cd1b8..6c2b051db3541 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -12,7 +12,7 @@ #include #include #include -namespace fs = std::filesystem; +#include using namespace std; using namespace ONNX_NAMESPACE; @@ -153,6 +153,7 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string nullptr, 0, 0, + 0, 0}; params.trt_engine_cache_enable = 1; @@ -224,6 +225,7 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string nullptr, 0, 0, + 0, 0}; params.trt_engine_cache_enable = 1; @@ -251,259 +253,6 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string th.join(); } - bool IsTensorRTCacheExisted(std::string path, std::string file_extension) { - for (const auto & entry : fs::directory_iterator(path)) { - if (fs::path(file_extension) == fs::path(entry).extension()) { - return true; - } - } - return false; - } - - void RemoveTensorRTCache(std::string path, std::string file_extension) { - for (const auto & entry : fs::directory_iterator(path)) { - if (fs::path(file_extension) == fs::path(entry).extension()) { - fs::remove(entry); - } - } - } - - void CreateBaseModel(std::string model_name, std::string graph_name, bool is_dynamic_input_shape, std::vector dims) { - onnxruntime::Model model(graph_name, false, DefaultLoggingManager().DefaultLogger()); - auto& graph = model.MainGraph(); - std::vector inputs; - std::vector outputs; - - // FLOAT tensor - ONNX_NAMESPACE::TypeProto float_tensor; - float_tensor.mutable_tensor_type()->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); - - for (auto dim: dims) { - float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(dim); - } - - if (is_dynamic_input_shape) { - float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("sym1"); - float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("sym2"); - } - - auto& input_arg_1 = graph.GetOrCreateNodeArg("X", &float_tensor); - auto& input_arg_2 = graph.GetOrCreateNodeArg("Y", &float_tensor); - inputs.push_back(&input_arg_1); - inputs.push_back(&input_arg_2); - auto& output_arg = graph.GetOrCreateNodeArg("node_1_out_1", &float_tensor); - outputs.push_back(&output_arg); - graph.AddNode("node_1", "Add", "node 1.", inputs, outputs); - - auto& input_arg_3 = graph.GetOrCreateNodeArg("Z", &float_tensor); - inputs.clear(); - inputs.push_back(&output_arg); - inputs.push_back(&input_arg_3); - auto& output_arg_2 = graph.GetOrCreateNodeArg("M", &float_tensor); - outputs.clear(); - outputs.push_back(&output_arg_2); - graph.AddNode("node_2", "Add", "node 2.", inputs, outputs); - - auto status = graph.Resolve(); - ASSERT_TRUE(status.IsOK()); - status = onnxruntime::Model::Save(model, model_name); - } - - TEST_P(TensorrtExecutionProviderCacheTest, Run) { - // GetParam() returns the parameter of following format: - // ##cache type##_##input shape type##_##other information if needed## - std::string param = GetParam(); - std::string input_type = "static"; - std::string engine_info = "enginecache_disable"; // for timigh cache case only - size_t pos = param.find("_"); - ASSERT_NE(pos, std::string::npos); - std::string cache_type = ToUTF8String(param.substr(0, pos)); - if (cache_type.compare("timing") == 0) { - std::string suffix = param.substr(pos + 1); - size_t suffix_pos = suffix.find("_"); - input_type = ToUTF8String(suffix.substr(0, suffix_pos)); - engine_info = suffix.substr(suffix_pos + 1); -} else { - input_type = param.substr(pos + 1); -} - -std::string model_name = "trt_execution_provider_" + cache_type + "caching_test_" + input_type + ".onnx"; -std::vector dims; // static dims -if (input_type.compare("dynamic") == 0) { -dims.push_back(1); -CreateBaseModel(model_name, cache_type + "cachingtest", true, dims); // dynamic input shape -// dims is (1, sym1, sym2) -} -else { -dims.push_back(1); -dims.push_back(3); -dims.push_back(2); -CreateBaseModel(model_name, cache_type + "cachingtest", false, dims); // non-dynamic input shape -// dims is (1, 3, 2) -} - -SessionOptions so; -so.session_logid = "TensorrtExecutionProvider" + cache_type + "cacheTest"; -RunOptions run_options; -run_options.run_tag = so.session_logid; -InferenceSession session_object{so, GetEnvironment()}; -auto allocator_manager = session_object.GetAllocatorManager(); -auto cuda_provider = DefaultCudaExecutionProvider(); -cuda_provider->RegisterAllocator(allocator_manager); -auto cpu_allocator = cuda_provider->GetAllocator(0, OrtMemTypeCPU); -std::vector dims_mul_x = {1, 3, 2}; -std::vector values_mul_x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; -OrtValue ml_value_x; -CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_x); -OrtValue ml_value_y; -CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_y); -OrtValue ml_value_z; -CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_z); -NameMLValMap feeds; -feeds.insert(std::make_pair("X", ml_value_x)); -feeds.insert(std::make_pair("Y", ml_value_y)); -feeds.insert(std::make_pair("Z", ml_value_z)); - -// prepare outputs -std::vector output_names; -output_names.push_back("M"); -std::vector fetches; - -// prepare expected inputs and outputs -std::vector expected_dims_mul_m = {1, 3, 2}; -std::vector expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f}; - -OrtTensorRTProviderOptionsV2 params{ - 0, - 0, - nullptr, - 1000, - 1, - 1 << 30, - 0, - 0, - nullptr, - 0, - 0, - 0, - 0, - 0, - nullptr, - 0, - nullptr, - 0, - 0}; - -if (cache_type.compare("timing") == 0) { - -// create ort session -params.trt_timing_cache_enable = 1; -if (engine_info.compare("enginecache_enable") == 0) -params.trt_engine_cache_enable = 1; -std::unique_ptr execution_provider = TensorrtExecutionProviderWithOptions(¶ms); -EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); -auto status = session_object.Load(model_name); -ASSERT_TRUE(status.IsOK()); -status = session_object.Initialize(); -ASSERT_TRUE(status.IsOK()); - -// run inference -// timing cache should be created under the situation of non-dynamic/dynamic shape input and engine cache enabled/disabled -status = session_object.Run(run_options, feeds, output_names, &fetches); -ASSERT_TRUE(status.IsOK()); -VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m); -ASSERT_TRUE(IsTensorRTCacheExisted("./", ".timing")); -RemoveTensorRTCache("./", ".timing"); - -// run inference -// timing cache shoud not be used or created since input shape is not changed and engine won't be re-built -status = session_object.Run(run_options, feeds, output_names, &fetches); -ASSERT_TRUE(status.IsOK()); -VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m); -ASSERT_TRUE(!IsTensorRTCacheExisted("./", ".timing")); - -// create another ort session to test -InferenceSession session_object_2{so, GetEnvironment()}; -execution_provider = TensorrtExecutionProviderWithOptions(¶ms); -EXPECT_TRUE(session_object_2.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); -status = session_object_2.Load(model_name); -ASSERT_TRUE(status.IsOK()); -status = session_object_2.Initialize(); -ASSERT_TRUE(status.IsOK()); - -if (engine_info.compare("enginecache_enable") == 0) { -// engine cache is enabled - -// run inference -// timing cache shoud not be created since engine cache is existed and will be used -status = session_object_2.Run(run_options, feeds, output_names, &fetches); -ASSERT_TRUE(status.IsOK()); -VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m); -ASSERT_TRUE(!IsTensorRTCacheExisted("./", ".timing")); -} else { -// engine cache is not enabled - -// run inference -// timing cache shoud be created -status = session_object_2.Run(run_options, feeds, output_names, &fetches); -ASSERT_TRUE(status.IsOK()); -VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m); -ASSERT_TRUE(IsTensorRTCacheExisted("./", ".timing")); -RemoveTensorRTCache("./", ".timing"); -} - -if (input_type.compare("dynamic") == 0) { -// dynamic input shape - -// inference run with input shape {1, 1, 6} -// timing cache will be created -// TRT engine and profile will be updated -dims_mul_x = {1, 1, 6}; -CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_x); -CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_y); -CreateMLValue(cpu_allocator, dims_mul_x, values_mul_x, &ml_value_z); -feeds.clear(); -feeds.insert(std::make_pair("X", ml_value_x)); -feeds.insert(std::make_pair("Y", ml_value_y)); -feeds.insert(std::make_pair("Z", ml_value_z)); - -status = session_object_2.Run(run_options, feeds, output_names, &fetches); -ASSERT_TRUE(status.IsOK()); -VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m); -ASSERT_TRUE(IsTensorRTCacheExisted("./", ".timing")); -} - -// clean up caches for another session -RemoveTensorRTCache("./", ".timing"); -RemoveTensorRTCache("./", ".profile"); -RemoveTensorRTCache("./", ".engine"); - -} else if (cache_type.compare("engine") == 0) { -// #TODO -} else if (cache_type.compare("profile") == 0) { -// #TODO -} -} - -// The TensorrtExecutionProviderCacheTest aims to test the functionality of all the engine/profile/timing caches of ORT TRT. -// It uses value-parameterized test and the parameter in the test is a composite parameter which has following format: -// ##cache type##_##input shape type##_##additional provider options if needed## -// - cache type (could be engine cache, profile cache or timing cache) -// - input shape type (could be dynamic input shape or static input shape). -// -// -// We have following test parameters: -// - timing_dynamic_enginecache_enable: timing cache enabled, dynamic input shape and engine cache enable -// - timing_dynamic_enginecache_disable: timing cache enabled, dynamic input shape and engine cache disable -// - timing_static_enginecache_enable: timing cache enabled, static input shape and engine cache enable -// - timing_static_enginecache_disable: timing cache enabled, static input shape and engine cache disable -INSTANTIATE_TEST_SUITE_P(TensorrtExecutionProviderCacheTests, TensorrtExecutionProviderCacheTest, testing::Values("timing_dynamic_enginecache_enable", - "timing_dynamic_enginecache_disable", - "timing_static_enginecache_enable", - "timing_static_enginecache_disable"), -[](const ::testing::TestParamInfo& info) {return info.param;}); - - TEST(TensorrtExecutionProviderTest, SessionCreationWithMultiThreadsAndInferenceWithMultiThreads) { std::vector threads; std::string model_name = "trt_execution_provider_multithreading_test.onnx"; @@ -680,6 +429,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) { nullptr, 0, 0, + 0, 0}; if (cache_type.compare("engine") == 0) { @@ -792,9 +542,68 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) { } } else if (cache_type.compare("timing") == 0) { // add test code here + + /* Following code block tests the functionality of engine and optimization profile of ORT TRT, including: + * - timing cache cache serialization/de-serialization + * - benefir of usign a timing cache no matter if dynamic / static input + */ + uint64_t compilation_without_cache_ms, compilation_with_cache_ms; + + params.trt_timing_cache_enable = 1; + // std::chrono + { + auto start = chrono::steady_clock::now(); + std::unique_ptr execution_provider = TensorrtExecutionProviderWithOptions(¶ms); + EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); + auto status = session_object.Load(model_name); + ASSERT_TRUE(status.IsOK()); + status = session_object.Initialize(); + ASSERT_TRUE(status.IsOK()); + + // run inference + // TRT timing cache should be created under the situation of non-dynamic/dynamic shape input + status = session_object.Run(run_options, feeds, output_names, &fetches); + auto end = chrono::steady_clock::now(); + ASSERT_TRUE(status.IsOK()); + VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m); + ASSERT_TRUE(IsCacheExistedByType("./", ".timing")); + compilation_without_cache_ms = chrono::duration_cast(end - start).count(); + } + + // get new session and reinitialize model + // second same inference should resuse the cache and therefore have a faster build + if (input_type.compare("static") == 0) { + { + InferenceSession session_object_new{so, GetEnvironment()}; + { + auto start = chrono::steady_clock::now(); + std::unique_ptr execution_provider = TensorrtExecutionProviderWithOptions(¶ms); + EXPECT_TRUE(session_object_new.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); + auto status = session_object_new.Load(model_name); + ASSERT_TRUE(status.IsOK()); + status = session_object_new.Initialize(); + ASSERT_TRUE(status.IsOK()); + + // run inference + // TRT timing cache should be created under the situation of non-dynamic/dynamic shape input + status = session_object_new.Run(run_options, feeds, output_names, &fetches); + // TODO narrow down actual compilation section + auto end = chrono::steady_clock::now(); + + ASSERT_TRUE(status.IsOK()); + VerifyOutputs(fetches, expected_dims_mul_m, expected_values_mul_m); + ASSERT_TRUE(IsCacheExistedByType("./", ".timing")); + compilation_with_cache_ms = chrono::duration_cast(end - start).count(); + } + } + ASSERT_TRUE(compilation_with_cache_ms <= compilation_without_cache_ms); + } else { + // TODO test dynamic shapes + } } // clean up caches + RemoveCachesByType("./", ".timing"); RemoveCachesByType("./", ".engine"); RemoveCachesByType("./", ".profile"); } @@ -809,11 +618,13 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) { * We have following test parameters: * - engine_static: engine cache enabled with non-dynamic input shape * - engine_dynamic: engine cache enabled with dynamic input shape - * - timing_static: will be added - * - timing_dynamic: will be added + * - timing_static: timing cache enabled, static input shape + * - timing_dynamic: timing cache enabled, static input shape */ INSTANTIATE_TEST_SUITE_P(TensorrtExecutionProviderCacheTests, TensorrtExecutionProviderCacheTest, testing::Values("engine_static", - "engine_dynamic"), + "engine_dynamic", + "timing_static", + "timing_dynamic"), [](const ::testing::TestParamInfo& info) {return info.param;}); TEST(TensorrtExecutionProviderTest, FunctionTest) { From bf0b880081c45224e0bd7eb20c3cd20179e8d55c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20M=C3=BCller?= Date: Wed, 22 Feb 2023 11:50:00 +0100 Subject: [PATCH 25/30] append compute capability to cache and add force option --- .../tensorrt/tensorrt_provider_options.h | 3 +- .../core/session/onnxruntime_c_api.h | 4 ++- .../tensorrt/tensorrt_execution_provider.cc | 29 ++++++++++++++++--- .../tensorrt/tensorrt_execution_provider.h | 2 ++ .../tensorrt_execution_provider_info.cc | 10 ++++--- .../tensorrt_execution_provider_info.h | 5 ++-- .../tensorrt_execution_provider_utils.h | 13 ++++++++- .../tensorrt/tensorrt_provider_factory.cc | 2 ++ .../core/session/provider_bridge_ort.cc | 2 ++ onnxruntime/test/perftest/ort_test_session.cc | 2 ++ onnxruntime/test/providers/cpu/model_tests.cc | 2 +- .../providers/tensorrt/tensorrt_basic_test.cc | 3 ++ onnxruntime/test/util/default_providers.cc | 2 ++ 13 files changed, 65 insertions(+), 14 deletions(-) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h index 89b47daaeecf8..d3d35b9cdd3fa 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h @@ -5,7 +5,7 @@ /// /// Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT_V2. -/// Please note that this struct is *similar* to OrtTensorRTProviderOptions but only to be used internally. +/// Please note that this struct is *similar* to OrtTensorRTProviderOptions but only to be used internally. /// Going forward, new trt provider options are to be supported via this struct and usage of the publicly defined /// OrtTensorRTProviderOptions will be deprecated over time. /// User can only get the instance of OrtTensorRTProviderOptionsV2 via CreateTensorRTProviderOptions. @@ -32,4 +32,5 @@ struct OrtTensorRTProviderOptionsV2 { int trt_context_memory_sharing_enable; // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true int trt_layer_norm_fp32_fallback; // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true int trt_timing_cache_enable; // enable TensorRT timing cache. Default 0 = false, nonzero = true + int trt_force_timing_cache; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true }; diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 44c41e5d1f587..54f849d68fbac 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -528,6 +528,8 @@ typedef struct OrtTensorRTProviderOptions { int trt_dla_core; // DLA core number. Default 0 int trt_dump_subgraphs; // dump TRT subgraph. Default 0 = false, nonzero = true int trt_engine_cache_enable; // enable engine caching. Default 0 = false, nonzero = true + int trt_timing_cache_enable; // enable TensorRT timing cache. Default 0 = false, nonzero = true + int trt_force_timing_cache; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true const char* trt_engine_cache_path; // specify engine cache path int trt_engine_decryption_enable; // enable engine decryption. Default 0 = false, nonzero = true const char* trt_engine_decryption_lib_path; // specify engine decryption library path @@ -3632,7 +3634,7 @@ struct OrtApi { * 2. For windows, ort will infer the group id from a logical processor id, for example, assuming there are two groups with each has 64 logical processors, * an id of 64 will be inferred as the last processor of the 1st group, while 65 will be interpreted as the 1st processor of the second group. * Hence 64-65 is an invalid configuration, because a windows thread cannot be attached to processors across group boundary. - * + * * \since Version 1.14 */ ORT_API2_STATUS(SetGlobalIntraOpThreadAffinity, _Inout_ OrtThreadingOptions* tp_options, const char* affinity_string); diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 82384d208bcc6..3078a7b608e80 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -422,6 +422,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv timing_cache_enable_ = (std::stoi(timing_cache_enable_env) == 0 ? false : true); } + const std::string timing_force_match_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kForceTimingCache); + if (!timing_force_match_env.empty()) { + force_timing_cache_match_ = (std::stoi(timing_force_match_env) == 0 ? false : true); + } + if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) { const std::string engine_cache_path = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kEngineCachePath); cache_path_ = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kCachePath); @@ -1404,7 +1409,9 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorsetTimingCache(*timing_cache, false); + trt_config->setTimingCache(*timing_cache, force_timing_cache_match_); + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path; } // Build engine + auto engine_build_start = std::chrono::steady_clock::now(); trt_engine = tensorrt_ptr::unique_pointer(trt_builder->buildEngineWithConfig(*trt_network, *trt_config)); if (trt_engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP could not build engine for fused node: " + fused_node.Name()); } + auto engine_build_stop = std::chrono::steady_clock::now(); + LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_node_name_with_precision << " took: " << + std::chrono::duration_cast(engine_build_stop - engine_build_start).count() << "ms" << std::endl; + if (engine_cache_enable_) { nvinfer1::IHostMemory* serializedModel = trt_engine->serialize(); size_t engine_size = serializedModel->size(); @@ -1602,7 +1615,9 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorengine_cache_path, trt_state->trt_node_name_with_precision); const std::string engine_cache_path = cache_path + ".engine"; const std::string profile_cache_path = cache_path + ".profile"; - const std::string timing_cache_path = cache_path + ".timing"; + cudaDeviceProp prop; + CUDA_CALL_THROW(cudaGetDeviceProperties(&prop, 0)); + const std::string timing_cache_path = GetTimingCachePath(trt_state->engine_cache_path, prop); if (trt_state->engine_cache_enable && trt_engine == nullptr) { std::ifstream engine_file(engine_cache_path, std::ios::binary | std::ios::in); std::ifstream profile_file(profile_cache_path, std::ios::binary | std::ios::in); @@ -1846,14 +1861,20 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorsetTimingCache(*timing_cache, false); + trt_config->setTimingCache(*timing_cache, force_timing_cache_match_); + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path; } // Build engine { auto lock = GetApiLock(); + auto engine_build_start = std::chrono::steady_clock::now(); *(trt_state->engine) = tensorrt_ptr::unique_pointer( trt_builder->buildEngineWithConfig(*trt_state->network->get(), *trt_config)); + auto engine_build_stop = std::chrono::steady_clock::now(); + LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_state->trt_node_name_with_precision << " took: " << + std::chrono::duration_cast(engine_build_stop - engine_build_start).count() << "ms" << std::endl; + } if (trt_state->engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP Failed to Build Engine."); diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 5570061c23f0b..4c16de27bd94a 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -31,6 +31,7 @@ static const std::string kForceSequentialEngineBuild= "ORT_TENSORRT_FORCE_SEQUEN static const std::string kContextMemorySharingEnable= "ORT_TENSORRT_CONTEXT_MEMORY_SHARING_ENABLE"; static const std::string kLayerNormFP32Fallback= "ORT_TENSORRT_LAYER_NORM_FP32_FALLBACK"; static const std::string kTimingCacheEnable = "ORT_TENSORRT_TIMING_CACHE_ENABLE"; +static const std::string kForceTimingCache = "ORT_TENSORRT_FORCE_TIMING_CACHE_ENABLE"; // Old env variable for backward compatibility static const std::string kEngineCachePath = "ORT_TENSORRT_ENGINE_CACHE_PATH"; } // namespace tensorrt_env_vars @@ -179,6 +180,7 @@ class TensorrtExecutionProvider : public IExecutionProvider { int (*engine_decryption_)(const char*, char*, size_t*); int (*engine_encryption_)(const char*, char*, size_t); bool timing_cache_enable_ = false; + bool force_timing_cache_match_ = false; std::unordered_set control_flow_op_set_ = {"If", "Loop", "Scan"}; std::unordered_map> parsers_; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc index 9bc1997e44371..c79ddf5bcc985 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc @@ -27,12 +27,13 @@ constexpr const char* kCachePath = "trt_engine_cache_path"; constexpr const char* kDecryptionEnable = "trt_engine_decryption_enable"; constexpr const char* kDecryptionLibPath = "trt_engine_decryption_lib_path"; constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine_build"; -// add new provider option name here. +// add new provider option name here. constexpr const char* kContextMemorySharingEnable = "trt_context_memory_sharing_enable"; constexpr const char* kLayerNormFP32Fallback = "trt_layer_norm_fp32_fallback"; constexpr const char* kTimingCacheEnable = "trt_timing_cache_enable"; +constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache_match"; } // namespace provider_option_names -} // namespace tensorrt +} // namespace tensorrt TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions(const ProviderOptions& options) { TensorrtExecutionProviderInfo info{}; @@ -58,16 +59,17 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions .AddAssignmentToReference(tensorrt::provider_option_names::kInt8CalibTable, info.int8_calibration_table_name) .AddAssignmentToReference(tensorrt::provider_option_names::kInt8UseNativeCalibTable, info.int8_use_native_calibration_table) .AddAssignmentToReference(tensorrt::provider_option_names::kDLAEnable, info.dla_enable) - .AddAssignmentToReference(tensorrt::provider_option_names::kDLACore, info.dla_core) + .AddAssignmentToReference(tensorrt::provider_option_names::kDLACore, info.dla_core) .AddAssignmentToReference(tensorrt::provider_option_names::kDumpSubgraphs, info.dump_subgraphs) .AddAssignmentToReference(tensorrt::provider_option_names::kEngineCacheEnable, info.engine_cache_enable) .AddAssignmentToReference(tensorrt::provider_option_names::kCachePath, info.engine_cache_path) .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionEnable, info.engine_decryption_enable) - .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path) + .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path) .AddAssignmentToReference(tensorrt::provider_option_names::kForceSequentialEngineBuild, info.force_sequential_engine_build) .AddAssignmentToReference(tensorrt::provider_option_names::kContextMemorySharingEnable, info.context_memory_sharing_enable) .AddAssignmentToReference(tensorrt::provider_option_names::kLayerNormFP32Fallback, info.layer_norm_fp32_fallback) .AddAssignmentToReference(tensorrt::provider_option_names::kTimingCacheEnable, info.timing_cache_enable) + .AddAssignmentToReference(tensorrt::provider_option_names::kForceTimingCacheMatch, info.force_timing_cache) .Parse(options)); // add new provider option here. return info; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h index 2d4b8bd6df81f..bacc94795d69e 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h @@ -17,10 +17,10 @@ struct TensorrtExecutionProviderInfo { void* user_compute_stream{nullptr}; bool has_trt_options{false}; int max_partition_iterations{1000}; - int min_subgraph_size{1}; + int min_subgraph_size{1}; size_t max_workspace_size{1 << 30}; bool fp16_enable{false}; - bool int8_enable{false}; + bool int8_enable{false}; std::string int8_calibration_table_name{""}; bool int8_use_native_calibration_table{false}; bool dla_enable{false}; @@ -34,6 +34,7 @@ struct TensorrtExecutionProviderInfo { bool context_memory_sharing_enable{false}; bool layer_norm_fp32_fallback{false}; bool timing_cache_enable{false}; + bool force_timing_cache{false}; static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options); static ProviderOptions ToProviderOptions(const TensorrtExecutionProviderInfo& info); diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h index a04b971c6a7b9..d28e3d743df0e 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h @@ -167,10 +167,21 @@ std::string GetCachePath(const std::string& root, const std::string& name) { } } +/* + * Get Timing by compute capability + * + */ +std::string GetTimingCachePath(const std::string& root, cudaDeviceProp prop) { + // append compute capability of the GPU as this invalidates the cache and TRT will throw when loading the cache + const std::string timing_cache_name = "TensorrtExecutionProvider_cache_cc" + + std::to_string(prop.major * 10 + prop.minor) + ".timing"; + return GetCachePath(root, timing_cache_name); +} + /* * Get cache by type * - * \param root root path of the cache + * \param root root path of the cache * \param file_extension It could be ".engine", ".profile" or ".timing" */ std::vector GetCachesByType(const std::string& root, std::string file_extension) { diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc index 8b7e3c55ea396..e86730a7e58dd 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc @@ -71,6 +71,7 @@ struct Tensorrt_Provider : Provider { info.context_memory_sharing_enable = options.trt_context_memory_sharing_enable != 0; info.layer_norm_fp32_fallback = options.trt_layer_norm_fp32_fallback != 0; info.timing_cache_enable = options.trt_timing_cache_enable != 0; + info.force_timing_cache = options.trt_force_timing_cache != 0; return std::make_shared(info); } @@ -139,6 +140,7 @@ struct Tensorrt_Provider : Provider { trt_options.trt_context_memory_sharing_enable = internal_options.context_memory_sharing_enable; trt_options.trt_layer_norm_fp32_fallback = internal_options.layer_norm_fp32_fallback; trt_options.trt_timing_cache_enable = internal_options.timing_cache_enable; + trt_options.trt_force_timing_cache = internal_options.force_timing_cache; } ProviderOptions GetProviderOptions(const void* provider_options) override { diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 9795b34f359b3..ba97f75b624aa 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1266,6 +1266,7 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti // Add new provider option below // Use default value as this field is not available in OrtTensorRTProviderOptions trt_options_converted.trt_timing_cache_enable = 0; + trt_options_converted.trt_force_timing_cache = 0; trt_options_converted.trt_context_memory_sharing_enable = 0; trt_options_converted.trt_layer_norm_fp32_fallback = 0; return trt_options_converted; @@ -1575,6 +1576,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateTensorRTProviderOptions, _Outptr_ OrtTensorRT (*out)->trt_context_memory_sharing_enable = false; (*out)->trt_layer_norm_fp32_fallback = false; (*out)->trt_timing_cache_enable = false; + (*out)->trt_force_timing_cache = false; return nullptr; #else ORT_UNUSED_PARAMETER(out); diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index c624a83d3d925..22ba61412b8d0 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -76,6 +76,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device bool trt_context_memory_sharing_enable = false; bool trt_layer_norm_fp32_fallback = false; bool trt_timing_cache_enable = false; + bool trt_force_timing_cache = false; #ifdef _MSC_VER std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); @@ -257,6 +258,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device tensorrt_options.trt_context_memory_sharing_enable = trt_context_memory_sharing_enable; tensorrt_options.trt_layer_norm_fp32_fallback = trt_layer_norm_fp32_fallback; tensorrt_options.trt_timing_cache_enable = trt_timing_cache_enable; + tensorrt_options.trt_force_timing_cache = trt_force_timing_cache; session_options.AppendExecutionProvider_TensorRT_V2(tensorrt_options); OrtCUDAProviderOptions cuda_options; diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index 5adfcda2a38ea..fe93e2fc75255 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -687,7 +687,7 @@ TEST_P(ModelTest, Run) { if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) { OrtTensorRTProviderOptionsV2 params{0, 0, nullptr, 1000, 1, 1 << 30, 1, // enable fp16 - 0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0}; + 0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0, 0}; ASSERT_ORT_STATUS_OK(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2(ortso, ¶ms)); } else { OrtTensorRTProviderOptionsV2* ep_option; diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index 6c2b051db3541..31b538e4a6d02 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -154,6 +154,7 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string 0, 0, 0, + 0, 0}; params.trt_engine_cache_enable = 1; @@ -226,6 +227,7 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string 0, 0, 0, + 0, 0}; params.trt_engine_cache_enable = 1; @@ -430,6 +432,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) { 0, 0, 0, + 0, 0}; if (cache_type.compare("engine") == 0) { diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index 044c037365d63..869b002279f8f 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -39,6 +39,8 @@ std::unique_ptr DefaultTensorrtExecutionProvider() { 0, 0, 0, + 0, + 0, nullptr, 0, nullptr, From 244b437ed11dfd9f0029d1e24a7580a6baadf00e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20M=C3=BCller?= Date: Wed, 22 Feb 2023 11:50:40 +0100 Subject: [PATCH 26/30] Take timing of first warm up inference --- onnxruntime/test/perftest/performance_runner.cc | 5 +++++ onnxruntime/test/perftest/performance_runner.h | 1 + 2 files changed, 6 insertions(+) diff --git a/onnxruntime/test/perftest/performance_runner.cc b/onnxruntime/test/perftest/performance_runner.cc index 2039c65b53aa6..b27ded96d85a9 100644 --- a/onnxruntime/test/perftest/performance_runner.cc +++ b/onnxruntime/test/perftest/performance_runner.cc @@ -114,7 +114,9 @@ Status PerformanceRunner::Run() { } // warm up + initial_inference_result_.start = std::chrono::high_resolution_clock::now(); ORT_RETURN_IF_ERROR(RunOneIteration()); + initial_inference_result_.end = std::chrono::high_resolution_clock::now(); // TODO: start profiling // if (!performance_test_config_.run_config.profile_file.empty()) @@ -139,9 +141,12 @@ Status PerformanceRunner::Run() { std::chrono::duration session_create_duration = session_create_end_ - session_create_start_; // TODO: end profiling // if (!performance_test_config_.run_config.profile_file.empty()) session_object->EndProfiling(); + auto first_inference_duration = + std::chrono::duration_cast(initial_inference_result_.end - initial_inference_result_.start).count(); std::chrono::duration inference_duration = performance_result_.end - performance_result_.start; std::cout << "Session creation time cost: " << session_create_duration.count() << " s\n" + << "First inference time cost: " << first_inference_duration << " ms\n" << "Total inference time cost: " << performance_result_.total_time_cost << " s\n" // sum of time taken by each request << "Total inference requests: " << performance_result_.time_costs.size() << "\n" << "Average inference time cost: " << performance_result_.total_time_cost / performance_result_.time_costs.size() * 1000 << " ms\n" diff --git a/onnxruntime/test/perftest/performance_runner.h b/onnxruntime/test/perftest/performance_runner.h index aae68fd2d289f..da2df9c39f44c 100644 --- a/onnxruntime/test/perftest/performance_runner.h +++ b/onnxruntime/test/perftest/performance_runner.h @@ -106,6 +106,7 @@ class PerformanceRunner { private: std::chrono::time_point session_create_start_; std::chrono::time_point session_create_end_; + PerformanceResult initial_inference_result_; PerformanceResult performance_result_; PerformanceTestConfig performance_test_config_; std::unique_ptr test_model_info_; From 5db55ff16132c1e2c3b2d4dc353ff8e7f0ea761d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20M=C3=BCller?= Date: Mon, 27 Feb 2023 15:17:39 +0100 Subject: [PATCH 27/30] detailed build log option --- .../tensorrt/tensorrt_provider_options.h | 3 +- .../core/session/onnxruntime_c_api.h | 1 + .../tensorrt/tensorrt_execution_provider.cc | 79 ++++++++++++------- .../tensorrt/tensorrt_execution_provider.h | 4 + .../tensorrt_execution_provider_info.cc | 3 +- .../tensorrt_execution_provider_info.h | 1 + .../tensorrt/tensorrt_provider_factory.cc | 2 +- .../core/session/provider_bridge_ort.cc | 2 + onnxruntime/test/perftest/ort_test_session.cc | 10 +++ onnxruntime/test/providers/cpu/model_tests.cc | 2 +- .../providers/tensorrt/tensorrt_basic_test.cc | 3 + onnxruntime/test/util/default_providers.cc | 1 + 12 files changed, 77 insertions(+), 34 deletions(-) diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h index b2c650179fbb0..ddf390db3d1c3 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h @@ -5,7 +5,7 @@ /// /// Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT_V2. -/// Please note that this struct is *similar* to OrtTensorRTProviderOptions but only to be used internally. +/// Please note that this struct is *similar* to OrtTensorRTProviderOptions but only to be used internally. /// Going forward, new trt provider options are to be supported via this struct and usage of the publicly defined /// OrtTensorRTProviderOptions will be deprecated over time. /// User can only get the instance of OrtTensorRTProviderOptionsV2 via CreateTensorRTProviderOptions. @@ -33,4 +33,5 @@ struct OrtTensorRTProviderOptionsV2 { int trt_layer_norm_fp32_fallback; // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true int trt_timing_cache_enable; // enable TensorRT timing cache. Default 0 = false, nonzero = true int trt_force_timing_cache; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true + int trt_detailed_build_log; // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true }; diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 3881154f8d27e..66e2e64f8c434 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -532,6 +532,7 @@ typedef struct OrtTensorRTProviderOptions { int trt_engine_cache_enable; // enable engine caching. Default 0 = false, nonzero = true int trt_timing_cache_enable; // enable TensorRT timing cache. Default 0 = false, nonzero = true int trt_force_timing_cache; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true + int trt_detailed_build_log; // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true const char* trt_engine_cache_path; // specify engine cache path int trt_engine_decryption_enable; // enable engine decryption. Default 0 = false, nonzero = true const char* trt_engine_decryption_lib_path; // specify engine decryption library path diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 64d88e3c1596b..e962e5ef52047 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -343,6 +343,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv dump_subgraphs_ = info.dump_subgraphs; engine_cache_enable_ = info.engine_cache_enable; timing_cache_enable_ = info.timing_cache_enable; + detailed_build_log_ = info.detailed_build_log; if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) { cache_path_ = info.engine_cache_path; } @@ -422,6 +423,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv timing_cache_enable_ = (std::stoi(timing_cache_enable_env) == 0 ? false : true); } + const std::string detailed_build_log_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDetailedBuildLog); + if (!detailed_build_log_env.empty()) { + detailed_build_log_ = (std::stoi(detailed_build_log_env) == 0 ? false : true); + } + const std::string timing_force_match_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kForceTimingCache); if (!timing_force_match_env.empty()) { force_timing_cache_match_ = (std::stoi(timing_force_match_env) == 0 ? false : true); @@ -1414,9 +1420,12 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorsetTimingCache(*timing_cache, force_timing_cache_match_); - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path; + if (detailed_build_log_) { + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path; + } } // Build engine - auto engine_build_start = std::chrono::steady_clock::now(); + std::chrono::steady_clock::time_point engine_build_start; + if (detailed_build_log_) { + engine_build_start = std::chrono::steady_clock::now(); + } trt_engine = std::unique_ptr(trt_builder->buildEngineWithConfig(*trt_network, *trt_config)); if (trt_engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP could not build engine for fused node: " + fused_node.Name()); } - auto engine_build_stop = std::chrono::steady_clock::now(); - LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_node_name_with_precision << " took: " << - std::chrono::duration_cast(engine_build_stop - engine_build_start).count() << "ms" << std::endl; - + if (detailed_build_log_) { + auto engine_build_stop = std::chrono::steady_clock::now(); + LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_node_name_with_precision << " took: " << + std::chrono::duration_cast(engine_build_stop - engine_build_start).count() << "ms" << std::endl; + } if (engine_cache_enable_) { std::unique_ptr serializedModel(trt_engine->serialize()); size_t engine_size = serializedModel->size(); @@ -1512,7 +1527,9 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectornode_name], &tensorrt_mu_, fp16_enable_, int8_enable_, int8_calibration_cache_available_, dla_enable_, dla_core_, &max_workspace_size_, trt_node_name_with_precision, engine_cache_enable_, cache_path_, runtime_.get(), nullptr, allocator_, context_memory_sharing_enable_, &max_ctx_mem_size_, &context_memory_, - dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_, timing_cache_enable_}; + dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_, timing_cache_enable_, + force_timing_cache_match_, detailed_build_log_}; *state = p.release(); return 0; }; @@ -1619,9 +1637,12 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorengine_cache_path, trt_state->trt_node_name_with_precision); const std::string engine_cache_path = cache_path + ".engine"; const std::string profile_cache_path = cache_path + ".profile"; - cudaDeviceProp prop; - CUDA_CALL_THROW(cudaGetDeviceProperties(&prop, 0)); - const std::string timing_cache_path = GetTimingCachePath(trt_state->engine_cache_path, prop); + std::string timing_cache_path = ""; + if (timing_cache_enable_) { + cudaDeviceProp prop; + CUDA_CALL_THROW(cudaGetDeviceProperties(&prop, device_id_)); + timing_cache_path = GetTimingCachePath(cache_path_, prop); + } if (trt_state->engine_cache_enable && trt_engine == nullptr) { std::ifstream engine_file(engine_cache_path, std::ios::binary | std::ios::in); std::ifstream profile_file(profile_cache_path, std::ios::binary | std::ios::in); @@ -1866,19 +1887,25 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorsetTimingCache(*timing_cache, force_timing_cache_match_); - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path; + if (detailed_build_log_) { + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path; + } } // Build engine { auto lock = GetApiLock(); - auto engine_build_start = std::chrono::steady_clock::now(); + std::chrono::steady_clock::time_point engine_build_start; + if (detailed_build_log_) { + engine_build_start = std::chrono::steady_clock::now(); + } *(trt_state->engine) = std::unique_ptr( trt_builder->buildEngineWithConfig(*trt_state->network->get(), *trt_config)); - auto engine_build_stop = std::chrono::steady_clock::now(); + if (detailed_build_log_) { + auto engine_build_stop = std::chrono::steady_clock::now(); LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_state->trt_node_name_with_precision << " took: " << - std::chrono::duration_cast(engine_build_stop - engine_build_start).count() << "ms" << std::endl; - + std::chrono::duration_cast(engine_build_stop - engine_build_start).count() << "ms" << std::endl; + } } if (trt_state->engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP Failed to Build Engine."); @@ -1914,20 +1941,12 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectortiming_cache_enable) - { - auto timing_cache = trt_config->getTimingCache(); - std::unique_ptr timingCacheHostData{timing_cache->serialize()}; - if (timingCacheHostData == nullptr) { - return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not serialize timing cache: " + timing_cache_path); + if (detailed_build_log_) { + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path; } - saveTimingCacheFile(timing_cache_path, timingCacheHostData.get()); } + // Build context if (trt_state->context_memory_sharing_enable) { *(trt_state->context) = std::unique_ptr( diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index b294a7a51d26d..cb87b31e01b96 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -32,6 +32,7 @@ static const std::string kContextMemorySharingEnable = "ORT_TENSORRT_CONTEXT_MEM static const std::string kLayerNormFP32Fallback = "ORT_TENSORRT_LAYER_NORM_FP32_FALLBACK"; static const std::string kTimingCacheEnable = "ORT_TENSORRT_TIMING_CACHE_ENABLE"; static const std::string kForceTimingCache = "ORT_TENSORRT_FORCE_TIMING_CACHE_ENABLE"; +static const std::string kDetailedBuildLog = "ORT_TENSORRT_DETAILED_BUILD_LOG_ENABLE"; // Old env variable for backward compatibility static const std::string kEngineCachePath = "ORT_TENSORRT_ENGINE_CACHE_PATH"; } // namespace tensorrt_env_vars @@ -117,6 +118,8 @@ struct TensorrtFuncState { int (*engine_decryption)(const char*, char*, size_t*) = nullptr; int (*engine_encryption)(const char*, char*, size_t) = nullptr; bool timing_cache_enable = true; + bool force_timing_cache = false; + bool detailed_build_log = false; }; // Logical device representation. @@ -181,6 +184,7 @@ class TensorrtExecutionProvider : public IExecutionProvider { int (*engine_encryption_)(const char*, char*, size_t) = nullptr; bool timing_cache_enable_ = false; bool force_timing_cache_match_ = false; + bool detailed_build_log_ = false; std::unordered_set control_flow_op_set_ = {"If", "Loop", "Scan"}; std::unordered_map> parsers_; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc index 5389edd4af5c8..ae06e6ce1338d 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc @@ -32,6 +32,7 @@ constexpr const char* kContextMemorySharingEnable = "trt_context_memory_sharing_ constexpr const char* kLayerNormFP32Fallback = "trt_layer_norm_fp32_fallback"; constexpr const char* kTimingCacheEnable = "trt_timing_cache_enable"; constexpr const char* kForceTimingCacheMatch = "trt_force_timing_cache_match"; +constexpr const char* kDetailedBuildLog = "trt_detailed_build_log"; } // namespace provider_option_names } // namespace tensorrt @@ -64,7 +65,7 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions .AddAssignmentToReference(tensorrt::provider_option_names::kEngineCacheEnable, info.engine_cache_enable) .AddAssignmentToReference(tensorrt::provider_option_names::kCachePath, info.engine_cache_path) .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionEnable, info.engine_decryption_enable) - .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path) + .AddAssignmentToReference(tensorrt::provider_option_names::kDecryptionLibPath, info.engine_decryption_lib_path) .AddAssignmentToReference(tensorrt::provider_option_names::kForceSequentialEngineBuild, info.force_sequential_engine_build) .AddAssignmentToReference(tensorrt::provider_option_names::kContextMemorySharingEnable, info.context_memory_sharing_enable) .AddAssignmentToReference(tensorrt::provider_option_names::kLayerNormFP32Fallback, info.layer_norm_fp32_fallback) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h index bacc94795d69e..1a2e5e01af464 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h @@ -35,6 +35,7 @@ struct TensorrtExecutionProviderInfo { bool layer_norm_fp32_fallback{false}; bool timing_cache_enable{false}; bool force_timing_cache{false}; + bool detailed_build_log{false}; static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options); static ProviderOptions ToProviderOptions(const TensorrtExecutionProviderInfo& info); diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc index e86730a7e58dd..9b4b8236e0b23 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc @@ -71,7 +71,7 @@ struct Tensorrt_Provider : Provider { info.context_memory_sharing_enable = options.trt_context_memory_sharing_enable != 0; info.layer_norm_fp32_fallback = options.trt_layer_norm_fp32_fallback != 0; info.timing_cache_enable = options.trt_timing_cache_enable != 0; - info.force_timing_cache = options.trt_force_timing_cache != 0; + info.detailed_build_log = options.trt_detailed_build_log != 0; return std::make_shared(info); } diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index ef7f20d1b7d96..8e70dd24ac10a 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1277,6 +1277,7 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti // Use default value as this field is not available in OrtTensorRTProviderOptions trt_options_converted.trt_timing_cache_enable = 0; trt_options_converted.trt_force_timing_cache = 0; + trt_options_converted.trt_detailed_build_log = 0; trt_options_converted.trt_context_memory_sharing_enable = 0; trt_options_converted.trt_layer_norm_fp32_fallback = 0; return trt_options_converted; @@ -1605,6 +1606,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateTensorRTProviderOptions, _Outptr_ OrtTensorRT (*out)->trt_layer_norm_fp32_fallback = false; (*out)->trt_timing_cache_enable = false; (*out)->trt_force_timing_cache = false; + (*out)->trt_detailed_build_log = false; return nullptr; #else ORT_UNUSED_PARAMETER(out); diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 4ff66c6a2067b..552274b77bbfb 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -123,6 +123,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device bool trt_layer_norm_fp32_fallback = false; bool trt_timing_cache_enable = false; bool trt_force_timing_cache = false; + bool trt_detailed_build_log = false; #ifdef _MSC_VER std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); @@ -286,6 +287,14 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } else { ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_force_timing_cache' should be a boolean i.e. true or false. Default value is false.\n"); } + } else if (key == "trt_detailed_build_log") { + if (value == "true" || value == "True") { + trt_detailed_build_log = true; + } else if (value == "false" || value == "False") { + trt_detailed_build_log = false; + } else { + ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_detailed_build_log' should be a boolean i.e. true or false. Default value is false.\n"); + } } else { ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['device_id', 'trt_max_partition_iterations', 'trt_min_subgraph_size', 'trt_max_workspace_size', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table', 'trt_dla_enable', 'trt_dla_core', 'trt_dump_subgraphs', 'trt_engine_cache_enable', 'trt_engine_cache_path', 'trt_engine_decryption_enable', 'trt_engine_decryption_lib_path', 'trt_force_sequential_engine_build', 'trt_context_memory_sharing_enable', 'trt_layer_norm_fp32_fallback'] \n"); } @@ -313,6 +322,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device tensorrt_options.trt_layer_norm_fp32_fallback = trt_layer_norm_fp32_fallback; tensorrt_options.trt_timing_cache_enable = trt_timing_cache_enable; tensorrt_options.trt_force_timing_cache = trt_force_timing_cache; + tensorrt_options.trt_detailed_build_log = trt_detailed_build_log; session_options.AppendExecutionProvider_TensorRT_V2(tensorrt_options); OrtCUDAProviderOptions cuda_options; diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index 768abbc316bac..1da491fe4f9b0 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -683,7 +683,7 @@ TEST_P(ModelTest, Run) { if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) { OrtTensorRTProviderOptionsV2 params{0, 0, nullptr, 1000, 1, 1 << 30, 1, // enable fp16 - 0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0, 0}; + 0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0, 0, 0}; ortso.AppendExecutionProvider_TensorRT_V2(params); } else { OrtTensorRTProviderOptionsV2* ep_option = nullptr; diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index df9a0bd190881..2d38bf7b4b3ba 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -155,6 +155,7 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string 0, 0, 0, + 0, 0}; params.trt_engine_cache_enable = 1; @@ -228,6 +229,7 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string 0, 0, 0, + 0, 0}; params.trt_engine_cache_enable = 1; @@ -394,6 +396,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) { 0, 0, 0, + 0, 0}; if (cache_type.compare("engine") == 0) { diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index d68d2512f32cf..55db973b6fce8 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -41,6 +41,7 @@ std::unique_ptr DefaultTensorrtExecutionProvider() { 0, 0, 0, + 0, nullptr, 0, nullptr, From 6b0be1db5fb68f9efe24697959f320219c8cfa09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20M=C3=BCller?= Date: Wed, 1 Mar 2023 18:59:50 +0100 Subject: [PATCH 28/30] format changes and adding force timing cache to provider options --- .../tensorrt/tensorrt_execution_provider.cc | 130 +++++++++--------- 1 file changed, 62 insertions(+), 68 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index e962e5ef52047..10c67963b85cc 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -118,34 +118,30 @@ bool SetDynamicRange(nvinfer1::INetworkDefinition& network, std::unordered_map loadTimingCacheFile(const std::string inFileName) -{ - std::ifstream iFile(inFileName, std::ios::in | std::ios::binary); - if (!iFile) - { - LOGS_DEFAULT(WARNING) << "[TensorRT EP] Could not read timing cache from: " << inFileName - << ". A new timing cache will be generated and written."; - return std::vector(); - } - iFile.seekg(0, std::ifstream::end); - size_t fsize = iFile.tellg(); - iFile.seekg(0, std::ifstream::beg); - std::vector content(fsize); - iFile.read(content.data(), fsize); - iFile.close(); - return content; +inline std::vector loadTimingCacheFile(const std::string inFileName) { + std::ifstream iFile(inFileName, std::ios::in | std::ios::binary); + if (!iFile) { + LOGS_DEFAULT(WARNING) << "[TensorRT EP] Could not read timing cache from: " << inFileName + << ". A new timing cache will be generated and written."; + return std::vector(); + } + iFile.seekg(0, std::ifstream::end); + size_t fsize = iFile.tellg(); + iFile.seekg(0, std::ifstream::beg); + std::vector content(fsize); + iFile.read(content.data(), fsize); + iFile.close(); + return content; } -inline void saveTimingCacheFile(const std::string outFileName, const nvinfer1::IHostMemory* blob) -{ - std::ofstream oFile(outFileName, std::ios::out | std::ios::binary); - if (!oFile) - { - LOGS_DEFAULT(WARNING) << "[TensorRT EP] Could not write timing cache to: " << outFileName; - return; - } - oFile.write((char*) blob->data(), blob->size()); - oFile.close(); +inline void saveTimingCacheFile(const std::string outFileName, const nvinfer1::IHostMemory* blob) { + std::ofstream oFile(outFileName, std::ios::out | std::ios::binary); + if (!oFile) { + LOGS_DEFAULT(WARNING) << "[TensorRT EP] Could not write timing cache to: " << outFileName; + return; + } + oFile.write((char*)blob->data(), blob->size()); + oFile.close(); } } // namespace @@ -343,6 +339,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv dump_subgraphs_ = info.dump_subgraphs; engine_cache_enable_ = info.engine_cache_enable; timing_cache_enable_ = info.timing_cache_enable; + force_timing_cache_match_ = info.force_timing_cache; detailed_build_log_ = info.detailed_build_log; if (engine_cache_enable_ || int8_enable_ || timing_cache_enable_) { cache_path_ = info.engine_cache_path; @@ -1473,8 +1470,8 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector timing_cache = nullptr; - if (timing_cache_enable_) { + std::unique_ptr timing_cache = nullptr; + if (timing_cache_enable_) { std::vector loaded_timing_cache = loadTimingCacheFile(timing_cache_path); timing_cache.reset(trt_config->createTimingCache(static_cast(loaded_timing_cache.data()), loaded_timing_cache.size())); if (timing_cache == nullptr) { @@ -1485,9 +1482,9 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector(engine_build_stop - engine_build_start).count() << "ms" << std::endl; + std::chrono::duration_cast(engine_build_stop - engine_build_start).count() << "ms" << std::endl; } if (engine_cache_enable_) { std::unique_ptr serializedModel(trt_engine->serialize()); @@ -1518,18 +1515,17 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorgetTimingCache(); - std::unique_ptr timingCacheHostData{timing_cache->serialize()}; - if (timingCacheHostData == nullptr) { - return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not serialize timing cache: " + timing_cache_path); - } - saveTimingCacheFile(timing_cache_path, timingCacheHostData.get()); - if (detailed_build_log_) { - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path; - } + if (timing_cache_enable_) { + auto timing_cache = trt_config->getTimingCache(); + std::unique_ptr timingCacheHostData{timing_cache->serialize()}; + if (timingCacheHostData == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not serialize timing cache: " + timing_cache_path); + } + saveTimingCacheFile(timing_cache_path, timingCacheHostData.get()); + if (detailed_build_log_) { + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path; + } } } } @@ -1880,16 +1876,16 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector timing_cache = nullptr; if (trt_state->timing_cache_enable) { - std::vector loaded_timing_cache = loadTimingCacheFile(timing_cache_path); - timing_cache.reset(trt_config->createTimingCache(static_cast(loaded_timing_cache.data()), loaded_timing_cache.size())); - if (timing_cache == nullptr) { - return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not create timing cache: " + timing_cache_path); - } - trt_config->setTimingCache(*timing_cache, force_timing_cache_match_); - if (detailed_build_log_) { - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path; - } + std::vector loaded_timing_cache = loadTimingCacheFile(timing_cache_path); + timing_cache.reset(trt_config->createTimingCache(static_cast(loaded_timing_cache.data()), loaded_timing_cache.size())); + if (timing_cache == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not create timing cache: " + timing_cache_path); + } + trt_config->setTimingCache(*timing_cache, force_timing_cache_match_); + if (detailed_build_log_) { + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Deserialized timing cache from " + timing_cache_path; + } } // Build engine @@ -1903,8 +1899,8 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectorbuildEngineWithConfig(*trt_state->network->get(), *trt_config)); if (detailed_build_log_) { auto engine_build_stop = std::chrono::steady_clock::now(); - LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_state->trt_node_name_with_precision << " took: " << - std::chrono::duration_cast(engine_build_stop - engine_build_start).count() << "ms" << std::endl; + LOGS_DEFAULT(INFO) << "TensorRT engine build for " << trt_state->trt_node_name_with_precision << " took: " << + std::chrono::duration_cast(engine_build_stop - engine_build_start).count() << "ms" << std::endl; } } if (trt_state->engine == nullptr) { @@ -1932,21 +1928,19 @@ common::Status TensorrtExecutionProvider::Compile(const std::vectortiming_cache_enable) - { - auto timing_cache = trt_config->getTimingCache(); - std::unique_ptr timingCacheHostData{timing_cache->serialize()}; - if (timingCacheHostData == nullptr) { - return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not serialize timing cache: " + timing_cache_path); - } - saveTimingCacheFile(timing_cache_path, timingCacheHostData.get()); - if (detailed_build_log_) { - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path; - } + if (trt_state->timing_cache_enable) { + auto timing_cache = trt_config->getTimingCache(); + std::unique_ptr timingCacheHostData{timing_cache->serialize()}; + if (timingCacheHostData == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not serialize timing cache: " + timing_cache_path); + } + saveTimingCacheFile(timing_cache_path, timingCacheHostData.get()); + if (detailed_build_log_) { + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized timing cache " + timing_cache_path; + } } - // Build context if (trt_state->context_memory_sharing_enable) { *(trt_state->context) = std::unique_ptr( From c1c3f712467b74673146e9f3f72c5cdbecd39267 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20M=C3=BCller?= Date: Mon, 6 Mar 2023 09:57:15 +0100 Subject: [PATCH 29/30] fix pybind OrtTensorRTProviderOptionsV2 --- onnxruntime/python/onnxruntime_pybind_state.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index dd24ce51e1111..667073063d4cc 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -366,6 +366,9 @@ std::unique_ptr CreateExecutionProviderInstance( nullptr, 0, 0, + 0, + 0, + 0, 0}; for (auto option : it->second) { if (option.first == "device_id") { From b888fc3ab888b418e270aa35cc2ee823aabf785d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20M=C3=BCller?= Date: Thu, 9 Mar 2023 00:12:27 +0100 Subject: [PATCH 30/30] reset OrtTensorRTProviderOptions --- include/onnxruntime/core/session/onnxruntime_c_api.h | 3 --- onnxruntime/test/util/default_providers.cc | 3 --- 2 files changed, 6 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 66e2e64f8c434..09cd8f0f748fc 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -530,9 +530,6 @@ typedef struct OrtTensorRTProviderOptions { int trt_dla_core; // DLA core number. Default 0 int trt_dump_subgraphs; // dump TRT subgraph. Default 0 = false, nonzero = true int trt_engine_cache_enable; // enable engine caching. Default 0 = false, nonzero = true - int trt_timing_cache_enable; // enable TensorRT timing cache. Default 0 = false, nonzero = true - int trt_force_timing_cache; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true - int trt_detailed_build_log; // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true const char* trt_engine_cache_path; // specify engine cache path int trt_engine_decryption_enable; // enable engine decryption. Default 0 = false, nonzero = true const char* trt_engine_decryption_lib_path; // specify engine decryption library path diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index 55db973b6fce8..333203085ae77 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -39,9 +39,6 @@ std::unique_ptr DefaultTensorrtExecutionProvider() { 0, 0, 0, - 0, - 0, - 0, nullptr, 0, nullptr,