Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmake/onnxruntime_providers_nv.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Licensed under the MIT License.
find_package(CUDAToolkit REQUIRED 12.8)
enable_language(CUDA)
Expand All @@ -9,6 +10,9 @@
if (onnxruntime_NV_PLACEHOLDER_BUILDER)
add_definitions(-DORT_NV_PLACEHOLDER_BUILDER)
endif()
if (NOT onnxruntime_USE_TENSORRT_BUILTIN_PARSER)
message(FATAL_ERROR "TensorRT RTX can not be used with the open source parser.")
endif ()
set(BUILD_LIBRARY_ONLY 1)
add_definitions("-DONNX_ML=1")
add_definitions("-DONNX_NAMESPACE=onnx")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ struct OrtTensorRTProviderOptionsV2 {
int trt_min_subgraph_size{1}; // minimum size of TensorRT subgraphs
size_t trt_max_workspace_size{0}; // maximum workspace size for TensorRT. Default is 0 means max device memory size
int trt_fp16_enable{0}; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
int trt_bf16_enable{0}; // enable TensorRT BF16 precision. Default 0 = false, nonzero = true
int trt_int8_enable{0}; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
const char* trt_int8_calibration_table_name{nullptr}; // TensorRT INT8 calibration table name.
int trt_int8_use_native_calibration_table{0}; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/nv_tensorrt_rtx/nv_allocator.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.

#include "nv_allocator.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,7 @@ Status BindContextInput(Ort::KernelContext& ctx,
switch (tensor_type) {
CASE_GET_INPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, float)
CASE_GET_INPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16, uint16_t)
CASE_GET_INPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16, uint16_t)
CASE_GET_INPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL, bool)
CASE_GET_INPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8, int8_t)
CASE_GET_INPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8, uint8_t)
Expand Down Expand Up @@ -829,6 +830,7 @@ Status BindContextOutput(Ort::KernelContext& ctx,
switch (output_type) {
CASE_GET_OUTPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, float)
CASE_GET_OUTPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16, uint16_t)
CASE_GET_OUTPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16, uint16_t)
CASE_GET_OUTPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL, bool)
CASE_GET_OUTPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8, int8_t)
CASE_GET_OUTPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8, uint8_t)
Expand Down Expand Up @@ -892,6 +894,7 @@ Status BindKernelOutput(Ort::KernelContext& ctx,
switch (output_type) {
CASE_COPY_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, float)
CASE_COPY_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16, uint16_t)
CASE_COPY_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16, uint16_t)
CASE_COPY_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL, bool)
CASE_COPY_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8, int8_t)
CASE_COPY_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8, uint8_t)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.

#pragma once
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.

#include <unordered_set>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.

#pragma once
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.

#include "core/providers/shared_library/provider_api.h"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.

#include "core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.

#pragma once
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.

#include <fstream>
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/nv_tensorrt_rtx/nv_includes.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.
#pragma once

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.

#include "core/providers/shared_library/provider_api.h"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.

#include "onnxruntime_c_api.h"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.

#pragma once
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.

#include <iostream>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Licensed under the MIT License.

#pragma once
Expand Down
54 changes: 36 additions & 18 deletions onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -958,6 +958,7 @@ Status BindContextInput(Ort::KernelContext& ctx,
switch (tensor_type) {
CASE_GET_INPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, float)
CASE_GET_INPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16, uint16_t)
CASE_GET_INPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16, uint16_t)
CASE_GET_INPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL, bool)
CASE_GET_INPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8, int8_t)
CASE_GET_INPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8, uint8_t)
Expand Down Expand Up @@ -1050,6 +1051,7 @@ Status BindContextOutput(Ort::KernelContext& ctx,
switch (output_type) {
CASE_GET_OUTPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, float)
CASE_GET_OUTPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16, uint16_t)
CASE_GET_OUTPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16, uint16_t)
CASE_GET_OUTPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL, bool)
CASE_GET_OUTPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8, int8_t)
CASE_GET_OUTPUT_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8, uint8_t)
Expand Down Expand Up @@ -1119,6 +1121,7 @@ Status BindKernelOutput(Ort::KernelContext& ctx,
switch (output_type) {
CASE_COPY_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, float)
CASE_COPY_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16, uint16_t)
CASE_COPY_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16, uint16_t)
CASE_COPY_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL, bool)
CASE_COPY_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8, int8_t)
CASE_COPY_TENSOR(ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8, uint8_t)
Expand Down Expand Up @@ -1336,6 +1339,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
min_subgraph_size_ = info.min_subgraph_size;
max_workspace_size_ = info.max_workspace_size;
fp16_enable_ = info.fp16_enable;
bf16_enable_ = info.bf16_enable;
int8_enable_ = info.int8_enable;
if (int8_enable_) {
int8_calibration_cache_name_ = info.int8_calibration_table_name;
Expand Down Expand Up @@ -1382,7 +1386,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
}
force_sequential_engine_build_ = info.force_sequential_engine_build;
context_memory_sharing_enable_ = info.context_memory_sharing_enable;
if (fp16_enable_) {
if (fp16_enable_ || bf16_enable_) {
layer_norm_fp32_fallback_ = info.layer_norm_fp32_fallback;
}
build_heuristics_enable_ = info.build_heuristics_enable;
Expand Down Expand Up @@ -1419,6 +1423,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
fp16_enable_ = (std::stoi(fp16_enable_env) == 0 ? false : true);
}

const std::string bf16_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kBF16Enable);
if (!bf16_enable_env.empty()) {
bf16_enable_ = (std::stoi(bf16_enable_env) == 0 ? false : true);
}

const std::string int8_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8Enable);
if (!int8_enable_env.empty()) {
int8_enable_ = (std::stoi(int8_enable_env) == 0 ? false : true);
Expand Down Expand Up @@ -1760,6 +1769,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
<< ", trt_min_subgraph_size: " << min_subgraph_size_
<< ", trt_max_workspace_size: " << max_workspace_size_
<< ", trt_fp16_enable: " << fp16_enable_
<< ", trt_bf16_enable: " << bf16_enable_
<< ", trt_int8_enable: " << int8_enable_
<< ", trt_int8_calibration_cache_name: " << int8_calibration_cache_name_
<< ", int8_calibration_cache_available: " << int8_calibration_cache_available_
Expand Down Expand Up @@ -2299,7 +2309,7 @@ SubGraphCollection_t TensorrtExecutionProvider::GetSupportedList(SubGraphCollect
auto trt_builder = GetBuilder(trt_logger);
auto network_flags = 0;
#if NV_TENSORRT_MAJOR > 8
network_flags |= fp16_enable_ || int8_enable_ ? 0 : 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED);
network_flags |= (fp16_enable_ || int8_enable_ || bf16_enable_) ? 0 : 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED);
#else
network_flags |= 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
#endif
Expand Down Expand Up @@ -2912,7 +2922,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
auto trt_builder = GetBuilder(trt_logger);
auto network_flags = 0;
#if NV_TENSORRT_MAJOR > 8
network_flags |= fp16_enable_ || int8_enable_ ? 0 : 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED);
network_flags |= (fp16_enable_ || int8_enable_ || bf16_enable_) ? 0 : 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED);
#else
network_flags |= 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
#endif
Expand All @@ -2925,7 +2935,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
}

// Force Pow + Reduce ops in layer norm to run in FP32 to avoid overflow
if (fp16_enable_ && layer_norm_fp32_fallback_) {
if ((fp16_enable_ || bf16_enable_) && layer_norm_fp32_fallback_) {
for (auto idx = 1; idx < trt_network->getNbLayers() - 1; ++idx) {
auto layer = trt_network->getLayer(idx);
auto next_layer = trt_network->getLayer(idx + 1);
Expand Down Expand Up @@ -3074,7 +3084,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
}

// Check platform availability for low precision
if (fp16_enable_) {
if (fp16_enable_ || bf16_enable_) {
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4996)
Expand All @@ -3084,7 +3094,8 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
#pragma warning(pop)
#endif
fp16_enable_ = false;
LOGS_DEFAULT(WARNING) << "[TensorRT EP] ORT_TENSORRT_FP16_ENABLE is set, but platform doesn't support fast native fp16";
bf16_enable_ = false;
LOGS_DEFAULT(WARNING) << "[TensorRT EP] ORT_TENSORRT_FP16_ENABLE or ORT_TENSORRT_BF16_ENABLE is set, but platform doesn't support fast native fp16/bf16";
}
}

Expand Down Expand Up @@ -3113,15 +3124,17 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView

// Set precision flags
std::string trt_node_name_with_precision = fused_node.Name();
if (fp16_enable_ && int8_enable_) {
trt_config->setFlags(1U << static_cast<uint32_t>(nvinfer1::BuilderFlag::kFP16) | 1U << static_cast<uint32_t>(nvinfer1::BuilderFlag::kINT8));
trt_node_name_with_precision += "_fp16_int8";
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] FP16 and INT8 mode is enabled";
} else if (fp16_enable_) {
if (fp16_enable_) {
trt_config->setFlag(nvinfer1::BuilderFlag::kFP16);
trt_node_name_with_precision += "_fp16";
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] FP16 mode is enabled";
} else if (int8_enable_) {
}
if (bf16_enable_) {
trt_config->setFlag(nvinfer1::BuilderFlag::kBF16);
trt_node_name_with_precision += "_bf16";
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] BF16 mode is enabled";
}
if (int8_enable_) {
trt_config->setFlag(nvinfer1::BuilderFlag::kINT8);
trt_node_name_with_precision += "_int8";
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] INT8 mode is enabled";
Expand Down Expand Up @@ -3541,7 +3554,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
*p = {context->allocate_func, context->release_func, context->allocator_handle, context->node_name, builder_.get(),
&parsers_[context->node_name], &engines_[context->node_name], &contexts_[context->node_name],
&networks_[context->node_name], input_info_[context->node_name], output_info_[context->node_name],
input_shape_ranges_[context->node_name], &tensorrt_mu_, fp16_enable_, int8_enable_, int8_calibration_cache_available_,
input_shape_ranges_[context->node_name], &tensorrt_mu_, fp16_enable_, bf16_enable_, int8_enable_, int8_calibration_cache_available_,
dla_enable_, dla_core_, trt_node_name_with_precision,
engine_cache_enable_, cache_path_, runtime_.get(), profiles_[context->node_name],
context_memory_sharing_enable_, &max_ctx_mem_size_, &context_memory_, dynamic_range_map, engine_decryption_enable_,
Expand Down Expand Up @@ -3743,12 +3756,17 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
}

// Set precision
if (trt_state->fp16_enable && trt_state->int8_enable) {
trt_config->setFlags(1U << static_cast<uint32_t>(nvinfer1::BuilderFlag::kFP16) | 1U << static_cast<uint32_t>(nvinfer1::BuilderFlag::kINT8));
} else if (trt_state->fp16_enable) {
trt_config->setFlag(nvinfer1::BuilderFlag::kFP16);
} else if (trt_state->int8_enable) {
if (trt_state->int8_enable) {
trt_config->setFlag(nvinfer1::BuilderFlag::kINT8);
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] INT8 mode is enabled";
}
if (trt_state->fp16_enable) {
trt_config->setFlag(nvinfer1::BuilderFlag::kFP16);
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] FP16 mode is enabled";
}
if (trt_state->bf16_enable) {
trt_config->setFlag(nvinfer1::BuilderFlag::kBF16);
LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] BF16 mode is enabled";
}

// Set DLA (DLA can only run with FP16 or INT8)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
static const std::string kMinSubgraphSize = "ORT_TENSORRT_MIN_SUBGRAPH_SIZE";
static const std::string kMaxWorkspaceSize = "ORT_TENSORRT_MAX_WORKSPACE_SIZE";
static const std::string kFP16Enable = "ORT_TENSORRT_FP16_ENABLE";
static const std::string kBF16Enable = "ORT_TENSORRT_BF16_ENABLE";

Check warning on line 26 in onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 For a static/global string constant, use a C style string instead: "static const char kBF16Enable[]". [runtime/string] [4] Raw Output: onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h:26: For a static/global string constant, use a C style string instead: "static const char kBF16Enable[]". [runtime/string] [4]
static const std::string kINT8Enable = "ORT_TENSORRT_INT8_ENABLE";
static const std::string kINT8CalibrationTableName = "ORT_TENSORRT_INT8_CALIBRATION_TABLE_NAME";
static const std::string kINT8UseNativeTensorrtCalibrationTable = "ORT_TENSORRT_INT8_USE_NATIVE_CALIBRATION_TABLE";
Expand Down Expand Up @@ -172,6 +173,7 @@
std::unordered_map<std::string, std::unordered_map<size_t, std::vector<std::vector<int64_t>>>> input_shape_ranges;
std::mutex* tensorrt_mu_ptr = nullptr;
bool fp16_enable = false;
bool bf16_enable = false;
bool int8_enable = false;
bool int8_calibration_cache_available = false;
bool dla_enable = false;
Expand Down Expand Up @@ -297,6 +299,7 @@
size_t min_subgraph_size_ = 1;
size_t max_workspace_size_ = 0;
bool fp16_enable_ = false;
bool bf16_enable_ = false;
bool int8_enable_ = false;
bool dla_enable_ = false;
int dla_core_ = 0;
Expand Down
Loading
Loading