Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1778,7 +1778,12 @@ SubGraphCollection_t NvExecutionProvider::GetSupportedList(SubGraphCollection_t
SubGraphCollection_t parser_nodes_list;
TensorrtLogger& trt_logger = GetTensorrtLogger(detailed_build_log_);
auto trt_builder = GetBuilder(trt_logger);
#if TRT_MAJOR_RTX >= 2 || (TRT_MAJOR_RTX == 1 && ((TRT_MINOR_RTX == 5 && TRT_BUILD_RTX >= 97) || TRT_MINOR_RTX >= 6))
// kSTRONGLY_TYPED == 0 => bit flag value is 1U. Use literal to avoid deprecated-enum warning (deprecated since 1.5.0.97).
constexpr uint32_t network_flags = 1U;
#else
auto network_flags = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED);
#endif
auto trt_network = std::unique_ptr<nvinfer1::INetworkDefinition>(trt_builder->createNetworkV2(network_flags));

bool is_model_supported = false;
Expand Down Expand Up @@ -2713,7 +2718,12 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr

TensorrtLogger& trt_logger = GetTensorrtLogger(detailed_build_log_);
auto trt_builder = GetBuilder(trt_logger);
#if TRT_MAJOR_RTX >= 2 || (TRT_MAJOR_RTX == 1 && ((TRT_MINOR_RTX == 5 && TRT_BUILD_RTX >= 97) || TRT_MINOR_RTX >= 6))
// kSTRONGLY_TYPED == 0 => bit flag value is 1U. Use literal to avoid deprecated-enum warning (deprecated since 1.5.0.97).
constexpr uint32_t network_flags = 1U;
#else
auto network_flags = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED);
#endif
auto trt_network = std::unique_ptr<nvinfer1::INetworkDefinition>(trt_builder->createNetworkV2(network_flags));
auto trt_config = std::unique_ptr<nvinfer1::IBuilderConfig>(trt_builder->createBuilderConfig());
auto trt_parser = tensorrt_ptr::unique_pointer<nvonnxparser::IParser>(nvonnxparser::createParser(*trt_network, trt_logger));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#pragma once
#include <ctime>
#include <utility>
#include <vector>
#ifndef USE_CUDA_MINIMAL
#include <cudnn.h>
#else
Expand Down Expand Up @@ -136,6 +138,13 @@ class OutputAllocator : public nvinfer1::IOutputAllocator {
*/
using ShapeRangesMap = std::unordered_map<std::string, std::unordered_map<size_t, std::vector<std::vector<int64_t>>>>;

// SubGraph_t and SubGraphCollection_t were defined in NvOnnxParser.h up to TRT-RTX 1.5.x
// but removed in 1.6.0. Define them here for 1.6+ so the provider owns these ORT-internal types.
#if TRT_MINOR_RTX >= 6
using SubGraph_t = std::pair<std::vector<size_t>, bool>;
using SubGraphCollection_t = std::vector<SubGraph_t>;
#endif

/**
* @brief Container for tensor data and their shape.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,16 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>&
LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Default plugin library is not on the path and is therefore ignored";
}
try {
int num_plugin_creator = 0;
auto plugin_creators = getPluginRegistry()->getPluginCreatorList(&num_plugin_creator);
// getAllCreators() is the TRT 10+ replacement for the removed getPluginCreatorList().
// It returns IPluginCreatorInterface* const*; cast each entry to the deprecated-but-present
// IPluginCreator* to access getPluginName()/getPluginVersion().
int32_t num_plugin_creator = 0;
auto plugin_creators = getPluginRegistry()->getAllCreators(&num_plugin_creator);
std::unordered_set<std::string> registered_plugin_names;

for (int i = 0; i < num_plugin_creator; i++) {
auto plugin_creator = plugin_creators[i];
for (int32_t i = 0; i < num_plugin_creator; i++) {
auto* plugin_creator = dynamic_cast<nvinfer1::IPluginCreator*>(plugin_creators[i]);
if (!plugin_creator) continue; // skip IPluginCreatorV3One entries that lack these accessors
std::string plugin_name(plugin_creator->getPluginName());
LOGS_DEFAULT(VERBOSE) << "[NvTensorRTRTX EP] " << plugin_name << ", version : " << plugin_creator->getPluginVersion();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,12 +158,12 @@ ONNX_NAMESPACE::TensorProto CreateLargeWeight(
std::vector<float> data(total_size);
std::normal_distribution<float> dist(0.0f, scale);
for (auto& v : data) v = dist(rng);
onnxruntime::utils::SetRawDataInTensorProto(tensor, data.data(), total_size * sizeof(float));
tensor.set_raw_data(data.data(), total_size * sizeof(float));
} else if (dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) {
std::vector<MLFloat16> data(total_size);
std::normal_distribution<float> dist(0.0f, scale);
for (auto& v : data) v = MLFloat16(dist(rng));
onnxruntime::utils::SetRawDataInTensorProto(tensor, data.data(), total_size * sizeof(MLFloat16));
tensor.set_raw_data(data.data(), total_size * sizeof(MLFloat16));
} else {
throw std::runtime_error("Unsupported data type for large weight");
}
Expand Down Expand Up @@ -582,7 +582,7 @@ void CreateFP8CustomOpModel(const PathString& model_name, const std::string& gra
scale_initializer.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT16);
// For FLOAT16, we need to convert the float value to MLFloat16 and store as raw data
MLFloat16 scale_value(0.0078125f);
onnxruntime::utils::SetRawDataInTensorProto(scale_initializer, &scale_value, sizeof(MLFloat16));
scale_initializer.set_raw_data(&scale_value, sizeof(MLFloat16));
graph.AddInitializedTensor(scale_initializer);

// Get the scale node arg
Expand Down Expand Up @@ -697,7 +697,7 @@ void CreateFP4CustomOpModel(const PathString& model_name, const std::string& gra
scale_initializer.set_name("scale");
scale_initializer.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT16);
MLFloat16 scale_value(0.1234f);
onnxruntime::utils::SetRawDataInTensorProto(scale_initializer, &scale_value, sizeof(MLFloat16));
scale_initializer.set_raw_data(&scale_value, sizeof(MLFloat16));
graph.AddInitializedTensor(scale_initializer);
auto* scale_arg = graph.GetNodeArg("scale");

Expand Down Expand Up @@ -756,7 +756,7 @@ void CreateFP4CustomOpModel(const PathString& model_name, const std::string& gra
dequant_scale_initializer.set_name("dequant_scale");
dequant_scale_initializer.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT16);
MLFloat16 dequant_scale_value(0.0625f);
onnxruntime::utils::SetRawDataInTensorProto(dequant_scale_initializer, &dequant_scale_value, sizeof(MLFloat16));
dequant_scale_initializer.set_raw_data(&dequant_scale_value, sizeof(MLFloat16));
graph.AddInitializedTensor(dequant_scale_initializer);

// Get the dequant_scale node arg
Expand Down
Loading