Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update trt workspace size param #44469

Merged
merged 8 commits into from
Aug 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(tensorrt_use_dla, TensorRtUseDLA, bool);
DECL_ARGUMENT_FIELD(tensorrt_dla_core, TensorRtDLACore, int);
DECL_ARGUMENT_FIELD(tensorrt_max_batch_size, TensorRtMaxBatchSize, int);
DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int);
DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int64_t);
DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int);
DECL_ARGUMENT_FIELD(tensorrt_disabled_ops,
TensorRtDisabledOPs,
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ void IRPassManager::CreatePasses(Argument *argument,
argument->bfloat16_enabled_op_types()));
#endif
} else if (pass_name == "tensorrt_subgraph_pass") {
pass->Set("workspace_size", new int(argument->tensorrt_workspace_size()));
pass->Set("workspace_size",
new int64_t(argument->tensorrt_workspace_size()));
pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size()));
pass->Set("min_subgraph_size",
new int(argument->tensorrt_min_subgraph_size()));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -378,7 +379,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
op_desc->SetBlockAttr("sub_block", new_block);
op_desc->SetAttr("subgraph", block_desc.Proto()->SerializeAsString());
op_desc->SetAttr("max_batch_size", max_batch_size);
op_desc->SetAttr("workspace_size", Get<int>("workspace_size"));
op_desc->SetAttr("workspace_size", Get<int64_t>("workspace_size"));
op_desc->SetAttr("gpu_id", Get<int>("gpu_device_id"));
op_desc->SetAttr("output_name_mapping", output_mapping);
op_desc->SetAttr("origin_output_dims", renamed_output_dims);
Expand Down Expand Up @@ -499,7 +500,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
inference::Singleton<inference::tensorrt::TRTEngineManager>::Global()
.Create(engine_key + std::to_string(predictor_id),
max_batch_size,
Get<int>("workspace_size"),
Get<int64_t>("workspace_size"),
precision_mode,
calibrator.get(),
Get<int>("gpu_device_id"),
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
}

void AnalysisConfig::EnableTensorRtEngine(
int workspace_size,
int64_t workspace_size,
int max_batch_size,
int min_subgraph_size,
AnalysisConfig::Precision precision_mode,
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ struct PD_INFER_DECL AnalysisConfig {
/// quantization).
///
///
void EnableTensorRtEngine(int workspace_size = 1 << 20,
void EnableTensorRtEngine(int64_t workspace_size = 1 << 30,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

文档里面是20.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

文档已修改

int max_batch_size = 1,
int min_subgraph_size = 3,
Precision precision = Precision::kFloat32,
Expand Down Expand Up @@ -967,7 +967,7 @@ struct PD_INFER_DECL AnalysisConfig {
bool use_tensorrt_{false};
// For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
int tensorrt_workspace_size_{1 << 30};
int64_t tensorrt_workspace_size_{1 << 30};
// While TensorRT allows an engine optimized for a given max batch size
// to run at any smaller size, the performance for those smaller
// sizes may not be as well-optimized. Therefore, Max batch is best
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/capi/paddle_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ PADDLE_CAPI_EXPORT extern bool PD_SpecifyInputName(

PADDLE_CAPI_EXPORT extern void PD_EnableTensorRtEngine(
PD_AnalysisConfig* config,
int workspace_size,
int64_t workspace_size,
int max_batch_size,
int min_subgraph_size,
Precision precision,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/capi/pd_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ bool PD_SpecifyInputName(const PD_AnalysisConfig* config) {
}

void PD_EnableTensorRtEngine(PD_AnalysisConfig* config,
int workspace_size,
int64_t workspace_size,
int max_batch_size,
int min_subgraph_size,
Precision precision,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/capi_exp/pd_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ PD_Bool PD_ConfigIrOptim(__pd_keep PD_Config* pd_config) {
}

void PD_ConfigEnableTensorRtEngine(__pd_keep PD_Config* pd_config,
int32_t workspace_size,
int64_t workspace_size,
int32_t max_batch_size,
int32_t min_subgraph_size,
PD_PrecisionType precision,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/capi_exp/pd_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigIrOptim(
///
PADDLE_CAPI_EXPORT extern void PD_ConfigEnableTensorRtEngine(
__pd_keep PD_Config* pd_config,
int32_t workspace_size,
int64_t workspace_size,
int32_t max_batch_size,
int32_t min_subgraph_size,
PD_PrecisionType precision,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/tensorrt/convert/ut_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class TRTConvertValidation {
TRTConvertValidation(int max_batch_size,
const std::unordered_set<std::string>& parameters,
framework::Scope& scope, // NOLINT
int workspace_size = 1 << 10,
int64_t workspace_size = 1 << 30,
bool if_add_batch = true)
: parameters_(parameters),
scope_(scope),
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/inference/tensorrt/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ class TensorRTEngine {

TensorRTEngine(
int max_batch,
int max_workspace,
int64_t max_workspace,
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0,
Expand Down Expand Up @@ -671,7 +671,7 @@ class TensorRTEngine {
// the runtime batch size
static int runtime_batch_;
// the max memory size the engine uses
int max_workspace_;
int64_t max_workspace_;

AnalysisConfig::Precision precision_;
TRTInt8Calibrator* calibrator_;
Expand Down Expand Up @@ -766,7 +766,7 @@ class TRTEngineManager {
TensorRTEngine* Create(
std::string name,
int max_batch,
int max_workspace,
int64_t max_workspace,
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
"engine_key",
"The engine_key here is used to distinguish different TRT Engines");
AddAttr<int>("max_batch_size", "the maximum batch size.");
AddAttr<int>("workspace_size", "the workspace size.");
AddAttr<int64_t>("workspace_size", "the workspace size.").AsExtra();
AddAttr<framework::BlockDesc *>("sub_block", "the trt block");
AddAttr<bool>("enable_int8", "whether swith to int8 mode");
AddComment("TensorRT engine operator.");
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
std::vector<std::string> runtime_input_names_;
mutable TensorRTEngine *trt_engine_{nullptr};
int max_batch_size_;
int workspace_size_;
int64_t workspace_size_;
std::unique_ptr<TRTInt8Calibrator> calibrator_;
bool enable_int8_;
bool enable_fp16_;
Expand Down Expand Up @@ -207,7 +207,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
: framework::OperatorBase(type, inputs, outputs, attrs) {
input_names_ = Inputs("Xs");
max_batch_size_ = Attr<int>("max_batch_size");
workspace_size_ = Attr<int>("workspace_size");
workspace_size_ = Attr<int64_t>("workspace_size");
device_id_ = Attr<int>("gpu_id");
enable_int8_ = Attr<bool>("enable_int8");
enable_fp16_ = Attr<bool>("enable_fp16");
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ void DynamicShapeTest(bool allow_build_at_runtime) {

engine_op_desc.SetBlockAttr("sub_block", &block_desc);
engine_op_desc.SetAttr("max_batch_size", static_cast<int>(2));
engine_op_desc.SetAttr("workspace_size", static_cast<int>(1 << 20));
engine_op_desc.SetAttr("workspace_size", static_cast<int64_t>(1 << 20));
engine_op_desc.SetAttr("parameters", std::vector<std::string>({}));
engine_op_desc.SetAttr("engine_key", std::string("a_engine"));
engine_op_desc.SetAttr("calibration_engine_key",
Expand Down Expand Up @@ -259,7 +259,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {

engine_op_desc.SetBlockAttr("sub_block", &block_desc);
engine_op_desc.SetAttr("max_batch_size", static_cast<int>(batch_size));
engine_op_desc.SetAttr("workspace_size", static_cast<int>(1 << 20));
engine_op_desc.SetAttr("workspace_size", static_cast<int64_t>(1 << 20));
engine_op_desc.SetAttr("parameters",
std::vector<std::string>({"y0", "y1", "y2", "y3"}));
engine_op_desc.SetAttr("engine_key", std::string("b_engine"));
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,7 @@ void BindAnalysisConfig(py::module *m) {
.def("specify_input_name", &AnalysisConfig::specify_input_name)
.def("enable_tensorrt_engine",
&AnalysisConfig::EnableTensorRtEngine,
py::arg("workspace_size") = 1 << 20,
py::arg("workspace_size") = 1 << 30,
py::arg("max_batch_size") = 1,
py::arg("min_subgraph_size") = 3,
py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
Expand Down