Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions onnxruntime/core/platform/linux/device_discovery.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <fstream>
#include <iterator>
#include <optional>
#include <regex>
#include <string_view>

#include "core/common/common.h"
Expand Down Expand Up @@ -114,6 +115,28 @@
return std::nullopt;
}

Status GetPciBusId(const std::filesystem::path& sysfs_path, std::optional<std::string>& pci_bus_id) {
constexpr const char* regex_pattern{R"([0-9a-f]+:[0-9a-f]+:[0-9a-f]+[.][0-9a-f]+)"};
static const std::regex pci_bus_id_regex(regex_pattern);

std::error_code error_code;
auto pci_bus_id_path = std::filesystem::canonical(sysfs_path / "device", error_code); // resolves symlink to PCI bus id, e.g. 0000:65:00.0
ORT_RETURN_IF_ERROR(ErrorCodeToStatus(error_code));

auto pci_bus_id_filename = pci_bus_id_path.filename();
if (std::regex_match(pci_bus_id_filename.string(), pci_bus_id_regex)) {
pci_bus_id = pci_bus_id_filename.string();
} else {
pci_bus_id = {};
LOGS_DEFAULT(WARNING) << MakeString("Skipping pci_bus_id for PCI path at \"",
pci_bus_id_path.string(),
"\" because filename \"", pci_bus_id_filename, "\" dit not match expected pattern of ",
regex_pattern);
};

Check warning on line 135 in onnxruntime/core/platform/linux/device_discovery.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 You don't need a ; after a } [readability/braces] [4] Raw Output: onnxruntime/core/platform/linux/device_discovery.cc:135: You don't need a ; after a } [readability/braces] [4]

return Status::OK();
}

Status GetGpuDeviceFromSysfs(const GpuSysfsPathInfo& path_info, OrtHardwareDevice& gpu_device_out) {
OrtHardwareDevice gpu_device{};
const auto& sysfs_path = path_info.path;
Expand All @@ -140,6 +163,12 @@
gpu_device.metadata.Add("Discrete", (*is_gpu_discrete ? "1" : "0"));
}

std::optional<std::string> pci_bus_id;

Check warning on line 166 in onnxruntime/core/platform/linux/device_discovery.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <string> for string [build/include_what_you_use] [4] Raw Output: onnxruntime/core/platform/linux/device_discovery.cc:166: Add #include <string> for string [build/include_what_you_use] [4]
ORT_RETURN_IF_ERROR(GetPciBusId(sysfs_path, pci_bus_id));
if (pci_bus_id) {
gpu_device.metadata.Add("pci_bus_id", std::move(*pci_bus_id));
}

gpu_device.type = OrtHardwareDeviceType_GPU;

gpu_device_out = std::move(gpu_device);
Expand Down
20 changes: 20 additions & 0 deletions onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ struct NvTensorRtRtxEpFactory : OrtEpFactory {
* @return True if the device is a supported NVIDIA GPU, false otherwise.
*/
bool IsOrtHardwareDeviceSupported(const OrtHardwareDevice& device) {
#if _WIN32
const auto& metadata_entries = device.metadata.Entries();
const auto it = metadata_entries.find("LUID");
if (it == metadata_entries.end()) {
Expand Down Expand Up @@ -625,6 +626,25 @@ struct NvTensorRtRtxEpFactory : OrtEpFactory {
}

return false;
#else
const auto& metadata_entries = device.metadata.Entries();
const auto it = metadata_entries.find("pci_bus_id");
if (it == metadata_entries.end()) {
return false;
}
auto& target_id = it->second;
int cuda_device_idx = 0;
if (cudaDeviceGetByPCIBusId(&cuda_device_idx, target_id.c_str()) != cudaSuccess) {
return false;
}

cudaDeviceProp prop;
if (cudaGetDeviceProperties(&prop, cuda_device_idx) != cudaSuccess) {
return false;
}
// Ampere architecture or newer is required.
return prop.major >= 8;
#endif
}

// Creates and returns OrtEpDevice instances for all OrtHardwareDevices that this factory supports.
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/nv_tensorrt_rtx/version_script.lds
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
VERS_1.0 {
global:
GetProvider;
CreateEpFactories;
ReleaseEpFactory;

# Hide everything else.
local:
Expand Down
8 changes: 4 additions & 4 deletions onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,6 @@ INSTANTIATE_TEST_SUITE_P(NvExecutionProviderTest, TypeTests,
),
[](const testing::TestParamInfo<TypeTests::ParamType>& info) { return getTypeAsName(info.param); });

#ifdef _WIN32
static bool SessionHasEp(Ort::Session& session, const char* ep_name) {
// Access the underlying InferenceSession.
const OrtSession* ort_session = session;
Expand All @@ -295,7 +294,6 @@ static bool SessionHasEp(Ort::Session& session, const char* ep_name) {
}

// Tests autoEP feature to automatically select an EP that supports the GPU.
// Currently only works on Windows.
TEST(NvExecutionProviderTest, AutoEp_PreferGpu) {
PathString model_name = ORT_TSTR("nv_execution_provider_auto_ep.onnx");
std::string graph_name = "test";
Expand All @@ -305,7 +303,11 @@ TEST(NvExecutionProviderTest, AutoEp_PreferGpu) {
CreateBaseModel(model_name, graph_name, dims);

{
#if _WIN32
ort_env->RegisterExecutionProviderLibrary(kNvTensorRTRTXExecutionProvider, ORT_TSTR("onnxruntime_providers_nv_tensorrt_rtx.dll"));
#else
ort_env->RegisterExecutionProviderLibrary(kNvTensorRTRTXExecutionProvider, ORT_TSTR("libonnxruntime_providers_nv_tensorrt_rtx.so"));
#endif

Ort::SessionOptions so;
so.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_GPU);
Expand Down Expand Up @@ -602,7 +604,5 @@ TEST(NvExecutionProviderTest, FP4CustomOpModel) {
LOGS_DEFAULT(INFO) << "[NvExecutionProviderTest] TRT FP4 dynamic quantize model run completed successfully";
}

#endif

} // namespace test
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ namespace test {

RegisteredEpDeviceUniquePtr AppendTrtEtxEP(Ort::SessionOptions& session_options, std::unordered_map<std::string, std::string>& option_map) {
RegisteredEpDeviceUniquePtr nv_tensorrt_rtx_ep;
#ifdef _WIN32
/// Since this test runs after other tests that use registration interface this test has to use it as well
/// windows as otherwise the kernel registry inside the EP will not be populated. The legacy APis ony call the initialize once.
Utils::RegisterAndGetNvTensorRtRtxEp(*ort_env, nv_tensorrt_rtx_ep);
Expand All @@ -26,9 +25,6 @@ RegisteredEpDeviceUniquePtr AppendTrtEtxEP(Ort::SessionOptions& session_options,
}
}
session_options.AppendExecutionProvider_V2(*ort_env, {selected_device}, option_map);
#else
session_options.AppendExecutionProvider(onnxruntime::kNvTensorRTRTXExecutionProvider, option_map);
#endif
return nv_tensorrt_rtx_ep;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

namespace onnxruntime {
namespace test {
#ifdef _WIN32

Utils::NvTensorRtRtxEpInfo Utils::nv_tensorrt_rtx_ep_info;

Expand Down Expand Up @@ -61,7 +60,6 @@ void Utils::RegisterAndGetNvTensorRtRtxEp(Ort::Env& env, RegisteredEpDeviceUniqu
c_api.UnregisterExecutionProviderLibrary(env, nv_tensorrt_rtx_ep_info.registration_name.c_str());
});
}
#endif // _WIN32

void CreateBaseModel(const PathString& model_name,
std::string graph_name,
Expand Down
Loading