diff --git a/onnxruntime/core/platform/linux/device_discovery.cc b/onnxruntime/core/platform/linux/device_discovery.cc index e9c45a6966ef8..db6ac73996863 100644 --- a/onnxruntime/core/platform/linux/device_discovery.cc +++ b/onnxruntime/core/platform/linux/device_discovery.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "core/common/common.h" @@ -114,6 +115,28 @@ std::optional IsGpuDiscrete(uint16_t vendor_id, uint16_t device_id) { return std::nullopt; } +Status GetPciBusId(const std::filesystem::path& sysfs_path, std::optional& pci_bus_id) { + constexpr const char* regex_pattern{R"([0-9a-f]+:[0-9a-f]+:[0-9a-f]+[.][0-9a-f]+)"}; + static const std::regex pci_bus_id_regex(regex_pattern); + + std::error_code error_code; + auto pci_bus_id_path = std::filesystem::canonical(sysfs_path / "device", error_code); // resolves symlink to PCI bus id, e.g. 0000:65:00.0 + ORT_RETURN_IF_ERROR(ErrorCodeToStatus(error_code)); + + auto pci_bus_id_filename = pci_bus_id_path.filename(); + if (std::regex_match(pci_bus_id_filename.string(), pci_bus_id_regex)) { + pci_bus_id = pci_bus_id_filename.string(); + } else { + pci_bus_id = {}; + LOGS_DEFAULT(WARNING) << MakeString("Skipping pci_bus_id for PCI path at \"", + pci_bus_id_path.string(), + "\" because filename \"", pci_bus_id_filename, "\" dit not match expected pattern of ", + regex_pattern); + }; + + return Status::OK(); +} + Status GetGpuDeviceFromSysfs(const GpuSysfsPathInfo& path_info, OrtHardwareDevice& gpu_device_out) { OrtHardwareDevice gpu_device{}; const auto& sysfs_path = path_info.path; @@ -140,6 +163,12 @@ Status GetGpuDeviceFromSysfs(const GpuSysfsPathInfo& path_info, OrtHardwareDevic gpu_device.metadata.Add("Discrete", (*is_gpu_discrete ? "1" : "0")); } + std::optional pci_bus_id; + ORT_RETURN_IF_ERROR(GetPciBusId(sysfs_path, pci_bus_id)); + if (pci_bus_id) { + gpu_device.metadata.Add("pci_bus_id", std::move(*pci_bus_id)); + } + gpu_device.type = OrtHardwareDeviceType_GPU; gpu_device_out = std::move(gpu_device); diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.cc index e5015e705958d..9955e73bf69ad 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.cc @@ -584,6 +584,7 @@ struct NvTensorRtRtxEpFactory : OrtEpFactory { * @return True if the device is a supported NVIDIA GPU, false otherwise. */ bool IsOrtHardwareDeviceSupported(const OrtHardwareDevice& device) { +#if _WIN32 const auto& metadata_entries = device.metadata.Entries(); const auto it = metadata_entries.find("LUID"); if (it == metadata_entries.end()) { @@ -625,6 +626,25 @@ struct NvTensorRtRtxEpFactory : OrtEpFactory { } return false; +#else + const auto& metadata_entries = device.metadata.Entries(); + const auto it = metadata_entries.find("pci_bus_id"); + if (it == metadata_entries.end()) { + return false; + } + auto& target_id = it->second; + int cuda_device_idx = 0; + if (cudaDeviceGetByPCIBusId(&cuda_device_idx, target_id.c_str()) != cudaSuccess) { + return false; + } + + cudaDeviceProp prop; + if (cudaGetDeviceProperties(&prop, cuda_device_idx) != cudaSuccess) { + return false; + } + // Ampere architecture or newer is required. + return prop.major >= 8; +#endif } // Creates and returns OrtEpDevice instances for all OrtHardwareDevices that this factory supports. diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/version_script.lds b/onnxruntime/core/providers/nv_tensorrt_rtx/version_script.lds index 094abb3329781..251e39e089275 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/version_script.lds +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/version_script.lds @@ -2,6 +2,8 @@ VERS_1.0 { global: GetProvider; + CreateEpFactories; + ReleaseEpFactory; # Hide everything else. local: diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc index c96282e6d7350..a54c35accbdc7 100644 --- a/onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc +++ b/onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc @@ -278,7 +278,6 @@ INSTANTIATE_TEST_SUITE_P(NvExecutionProviderTest, TypeTests, ), [](const testing::TestParamInfo& info) { return getTypeAsName(info.param); }); -#ifdef _WIN32 static bool SessionHasEp(Ort::Session& session, const char* ep_name) { // Access the underlying InferenceSession. const OrtSession* ort_session = session; @@ -295,7 +294,6 @@ static bool SessionHasEp(Ort::Session& session, const char* ep_name) { } // Tests autoEP feature to automatically select an EP that supports the GPU. -// Currently only works on Windows. TEST(NvExecutionProviderTest, AutoEp_PreferGpu) { PathString model_name = ORT_TSTR("nv_execution_provider_auto_ep.onnx"); std::string graph_name = "test"; @@ -305,7 +303,11 @@ TEST(NvExecutionProviderTest, AutoEp_PreferGpu) { CreateBaseModel(model_name, graph_name, dims); { +#if _WIN32 ort_env->RegisterExecutionProviderLibrary(kNvTensorRTRTXExecutionProvider, ORT_TSTR("onnxruntime_providers_nv_tensorrt_rtx.dll")); +#else + ort_env->RegisterExecutionProviderLibrary(kNvTensorRTRTXExecutionProvider, ORT_TSTR("libonnxruntime_providers_nv_tensorrt_rtx.so")); +#endif Ort::SessionOptions so; so.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_GPU); @@ -602,7 +604,5 @@ TEST(NvExecutionProviderTest, FP4CustomOpModel) { LOGS_DEFAULT(INFO) << "[NvExecutionProviderTest] TRT FP4 dynamic quantize model run completed successfully"; } -#endif - } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/nv_ep_context_test.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/nv_ep_context_test.cc index ac24dcb70c1dd..bcdfd18407ca8 100644 --- a/onnxruntime/test/providers/nv_tensorrt_rtx/nv_ep_context_test.cc +++ b/onnxruntime/test/providers/nv_tensorrt_rtx/nv_ep_context_test.cc @@ -14,7 +14,6 @@ namespace test { RegisteredEpDeviceUniquePtr AppendTrtEtxEP(Ort::SessionOptions& session_options, std::unordered_map& option_map) { RegisteredEpDeviceUniquePtr nv_tensorrt_rtx_ep; -#ifdef _WIN32 /// Since this test runs after other tests that use registration interface this test has to use it as well /// windows as otherwise the kernel registry inside the EP will not be populated. The legacy APis ony call the initialize once. Utils::RegisterAndGetNvTensorRtRtxEp(*ort_env, nv_tensorrt_rtx_ep); @@ -26,9 +25,6 @@ RegisteredEpDeviceUniquePtr AppendTrtEtxEP(Ort::SessionOptions& session_options, } } session_options.AppendExecutionProvider_V2(*ort_env, {selected_device}, option_map); -#else - session_options.AppendExecutionProvider(onnxruntime::kNvTensorRTRTXExecutionProvider, option_map); -#endif return nv_tensorrt_rtx_ep; } diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc index 47127399b4646..de028bf613a27 100644 --- a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc +++ b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc @@ -24,7 +24,6 @@ namespace onnxruntime { namespace test { -#ifdef _WIN32 Utils::NvTensorRtRtxEpInfo Utils::nv_tensorrt_rtx_ep_info; @@ -61,7 +60,6 @@ void Utils::RegisterAndGetNvTensorRtRtxEp(Ort::Env& env, RegisteredEpDeviceUniqu c_api.UnregisterExecutionProviderLibrary(env, nv_tensorrt_rtx_ep_info.registration_name.c_str()); }); } -#endif // _WIN32 void CreateBaseModel(const PathString& model_name, std::string graph_name,