diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 98d0b487d60b2..9eaf279b3a0eb 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -51,12 +51,19 @@ namespace perftest { "\t-d [cudnn_conv_algorithm]: Specify CUDNN convolution algothrithms: 0(benchmark), 1(heuristic), 2(default). \n" "\t-q: [CUDA only] use separate stream for copy. \n" "\t-z: Set denormal as zero. When turning on this option reduces latency dramatically, a model may have denormals.\n" + "\t-i: Specify EP specific runtime options as key value pairs. Different runtime options available are: \n" + "\t [OpenVINO only] [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.\n" + "\t [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n" + "\t [OpenVINO only] [enable_vpu_fast_compile]: Optionally enabled to speeds up the model's compilation on VPU device targets.\n" + "\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n" + "\t [Usage]: -e -i '| |'\n\n" + "\t [Example] [For OpenVINO EP] -e openvino -i 'device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5'\n" "\t-h: help\n"); } /*static*/ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { int ch; - while ((ch = getopt(argc, argv, ORT_TSTR("b:m:e:r:t:p:x:y:c:d:o:u:AMPIvhsqz"))) != -1) { + while ((ch = getopt(argc, argv, ORT_TSTR("b:m:e:r:t:p:x:y:c:d:o:u:i:AMPIvhsqz"))) != -1) { switch (ch) { case 'm': if (!CompareCString(optarg, ORT_TSTR("duration"))) { @@ -189,6 +196,9 @@ namespace perftest { case 'z': test_config.run_config.set_denormal_as_zero = true; break; + case 'i': + test_config.run_config.ep_runtime_config_string = optarg; + break; case '?': case 'h': default: diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 6949b5911793c..a88f6e349cddb 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -68,7 +68,59 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device #endif } else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) { #ifdef USE_OPENVINO - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_OpenVINO(session_options, "")); + std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision with these values at runtime. + bool enable_vpu_fast_compile = false; // [device_id]: Selects a particular hardware device for inference. + std::string device_id = ""; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format. + size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of threads with this value at runtime. + + std::istringstream ss(performance_test_config.run_config.ep_runtime_config_string); + std::string token; + while (ss >> token) { + if(token == "") { + continue; + } + auto pos = token.find("|"); + if (pos == std::string::npos || pos == 0 || pos == token.length()) { + ORT_THROW("[ERROR] [OpenVINO] Use a '|' to separate the key and value for the run-time option you are trying to use.\n"); + } + + auto key = token.substr(0,pos); + auto value = token.substr(pos+1); + + if (key == "device_type") { + std::set ov_supported_device_types = {"CPU_FP32", "GPU_FP32", "GPU_FP16", "VAD-M_FP16", "MYRIAD_FP16", "VAD-F_FP32"}; + if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) { + device_type = value; + } + else { + ORT_THROW("[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. select from 'CPU_FP32', 'GPU_FP32', 'GPU_FP16', 'VAD-M_FP16', 'MYRIAD_FP16', 'VAD-F_FP32' or from Hetero/Multi options available. \n"); + } + } else if (key == "device_id") { + device_id = value; + } else if (key == "enable_vpu_fast_compile") { + if(value == "true" || value == "True"){ + enable_vpu_fast_compile = true; + } else if (value == "false" || value == "False") { + enable_vpu_fast_compile = false; + } else { + ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n"); + } + } else if (key == "num_of_threads") { + std::stringstream sstream(value); + sstream >> num_of_threads; + if ((int)num_of_threads <=0) { + ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_of_threads' should be greater than 0\n"); + } + } else { + ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads'] \n"); + } + } + OrtOpenVINOProviderOptions options; + options.device_type = device_type.c_str(); //To set the device_type + options.device_id = device_id.c_str(); // To set the device_id + options.enable_vpu_fast_compile = enable_vpu_fast_compile; // To enable_vpu_fast_compile, default is false + options.num_of_threads = num_of_threads; // To set number of free InferRequests, default is 8 + session_options.AppendExecutionProvider_OpenVINO(options); #else ORT_THROW("OpenVINO is not supported in this build\n"); #endif diff --git a/onnxruntime/test/perftest/test_configuration.h b/onnxruntime/test/perftest/test_configuration.h index 63771936c2be5..3205bbf9d55a7 100644 --- a/onnxruntime/test/perftest/test_configuration.h +++ b/onnxruntime/test/perftest/test_configuration.h @@ -53,6 +53,7 @@ struct RunConfig { int cudnn_conv_algo{0}; bool do_cuda_copy_in_separate_stream{false}; bool set_denormal_as_zero{false}; + std::basic_string ep_runtime_config_string; }; struct PerformanceTestConfig {