PaddlePaddle · jiweibo · Aug 5, 2022 · Jul 20, 2022 · Jul 20, 2022 · Jul 20, 2022
diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
@@ -216,7 +216,7 @@ struct Argument {
   DECL_ARGUMENT_FIELD(tensorrt_use_dla, TensorRtUseDLA, bool);
   DECL_ARGUMENT_FIELD(tensorrt_dla_core, TensorRtDLACore, int);
   DECL_ARGUMENT_FIELD(tensorrt_max_batch_size, TensorRtMaxBatchSize, int);
-  DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int);
+  DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int64_t);
   DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int);
   DECL_ARGUMENT_FIELD(tensorrt_disabled_ops,
                       TensorRtDisabledOPs,

diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -133,7 +133,8 @@ void IRPassManager::CreatePasses(Argument *argument,
                     argument->bfloat16_enabled_op_types()));
 #endif
     } else if (pass_name == "tensorrt_subgraph_pass") {
-      pass->Set("workspace_size", new int(argument->tensorrt_workspace_size()));
+      pass->Set("workspace_size",
+                new int64_t(argument->tensorrt_workspace_size()));
       pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size()));
       pass->Set("min_subgraph_size",
                 new int(argument->tensorrt_min_subgraph_size()));

diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -1,3 +1,4 @@
+
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -378,7 +379,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
   op_desc->SetBlockAttr("sub_block", new_block);
   op_desc->SetAttr("subgraph", block_desc.Proto()->SerializeAsString());
   op_desc->SetAttr("max_batch_size", max_batch_size);
-  op_desc->SetAttr("workspace_size", Get<int>("workspace_size"));
+  op_desc->SetAttr("workspace_size", Get<int64_t>("workspace_size"));
   op_desc->SetAttr("gpu_id", Get<int>("gpu_device_id"));
   op_desc->SetAttr("output_name_mapping", output_mapping);
   op_desc->SetAttr("origin_output_dims", renamed_output_dims);
@@ -499,7 +500,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
       inference::Singleton<inference::tensorrt::TRTEngineManager>::Global()
           .Create(engine_key + std::to_string(predictor_id),
                   max_batch_size,
-                  Get<int>("workspace_size"),
+                  Get<int64_t>("workspace_size"),
                   precision_mode,
                   calibrator.get(),
                   Get<int>("gpu_device_id"),

diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
@@ -513,7 +513,7 @@ MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
 }
 
 void AnalysisConfig::EnableTensorRtEngine(
-    int workspace_size,
+    int64_t workspace_size,
     int max_batch_size,
     int min_subgraph_size,
     AnalysisConfig::Precision precision_mode,

diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -523,7 +523,7 @@ struct PD_INFER_DECL AnalysisConfig {
   /// quantization).
   ///
   ///
-  void EnableTensorRtEngine(int workspace_size = 1 << 20,
+  void EnableTensorRtEngine(int64_t workspace_size = 1 << 30,
                             int max_batch_size = 1,
                             int min_subgraph_size = 3,
                             Precision precision = Precision::kFloat32,
@@ -967,7 +967,7 @@ struct PD_INFER_DECL AnalysisConfig {
   bool use_tensorrt_{false};
   // For workspace_size, refer it from here:
   // https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
-  int tensorrt_workspace_size_{1 << 30};
+  int64_t tensorrt_workspace_size_{1 << 30};
   // While TensorRT allows an engine optimized for a given max batch size
   // to run at any smaller size, the performance for those smaller
   // sizes may not be as well-optimized. Therefore, Max batch is best

diff --git a/paddle/fluid/inference/capi/paddle_c_api.h b/paddle/fluid/inference/capi/paddle_c_api.h
@@ -214,7 +214,7 @@ PADDLE_CAPI_EXPORT extern bool PD_SpecifyInputName(
 
 PADDLE_CAPI_EXPORT extern void PD_EnableTensorRtEngine(
     PD_AnalysisConfig* config,
-    int workspace_size,
+    int64_t workspace_size,
     int max_batch_size,
     int min_subgraph_size,
     Precision precision,

diff --git a/paddle/fluid/inference/capi/pd_config.cc b/paddle/fluid/inference/capi/pd_config.cc
@@ -243,7 +243,7 @@ bool PD_SpecifyInputName(const PD_AnalysisConfig* config) {
 }
 
 void PD_EnableTensorRtEngine(PD_AnalysisConfig* config,
-                             int workspace_size,
+                             int64_t workspace_size,
                              int max_batch_size,
                              int min_subgraph_size,
                              Precision precision,

diff --git a/paddle/fluid/inference/capi_exp/pd_config.cc b/paddle/fluid/inference/capi_exp/pd_config.cc
@@ -219,7 +219,7 @@ PD_Bool PD_ConfigIrOptim(__pd_keep PD_Config* pd_config) {
 }
 
 void PD_ConfigEnableTensorRtEngine(__pd_keep PD_Config* pd_config,
-                                   int32_t workspace_size,
+                                   int64_t workspace_size,
                                    int32_t max_batch_size,
                                    int32_t min_subgraph_size,
                                    PD_PrecisionType precision,

diff --git a/paddle/fluid/inference/capi_exp/pd_config.h b/paddle/fluid/inference/capi_exp/pd_config.h
@@ -329,7 +329,7 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigIrOptim(
 ///
 PADDLE_CAPI_EXPORT extern void PD_ConfigEnableTensorRtEngine(
     __pd_keep PD_Config* pd_config,
-    int32_t workspace_size,
+    int64_t workspace_size,
     int32_t max_batch_size,
     int32_t min_subgraph_size,
     PD_PrecisionType precision,

diff --git a/paddle/fluid/inference/tensorrt/convert/ut_helper.h b/paddle/fluid/inference/tensorrt/convert/ut_helper.h
@@ -79,7 +79,7 @@ class TRTConvertValidation {
   TRTConvertValidation(int max_batch_size,
                        const std::unordered_set<std::string>& parameters,
                        framework::Scope& scope,  // NOLINT
-                       int workspace_size = 1 << 10,
+                       int64_t workspace_size = 1 << 30,
                        bool if_add_batch = true)
       : parameters_(parameters),
         scope_(scope),

diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
@@ -206,7 +206,7 @@ class TensorRTEngine {
 
   TensorRTEngine(
       int max_batch,
-      int max_workspace,
+      int64_t max_workspace,
       AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
       TRTInt8Calibrator* calibrator = nullptr,
       int device_id = 0,
@@ -671,7 +671,7 @@ class TensorRTEngine {
   // the runtime batch size
   static int runtime_batch_;
   // the max memory size the engine uses
-  int max_workspace_;
+  int64_t max_workspace_;
 
   AnalysisConfig::Precision precision_;
   TRTInt8Calibrator* calibrator_;
@@ -766,7 +766,7 @@ class TRTEngineManager {
   TensorRTEngine* Create(
       std::string name,
       int max_batch,
-      int max_workspace,
+      int64_t max_workspace,
       AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
       TRTInt8Calibrator* calibrator = nullptr,
       int device_id = 0,

diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc
@@ -34,7 +34,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
         "engine_key",
         "The engine_key here is used to distinguish different TRT Engines");
     AddAttr<int>("max_batch_size", "the maximum batch size.");
-    AddAttr<int>("workspace_size", "the workspace size.");
+    AddAttr<int64_t>("workspace_size", "the workspace size.").AsExtra();
     AddAttr<framework::BlockDesc *>("sub_block", "the trt block");
     AddAttr<bool>("enable_int8", "whether swith to int8 mode");
     AddComment("TensorRT engine operator.");

diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
@@ -177,7 +177,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
   std::vector<std::string> runtime_input_names_;
   mutable TensorRTEngine *trt_engine_{nullptr};
   int max_batch_size_;
-  int workspace_size_;
+  int64_t workspace_size_;
   std::unique_ptr<TRTInt8Calibrator> calibrator_;
   bool enable_int8_;
   bool enable_fp16_;
@@ -207,7 +207,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
       : framework::OperatorBase(type, inputs, outputs, attrs) {
     input_names_ = Inputs("Xs");
     max_batch_size_ = Attr<int>("max_batch_size");
-    workspace_size_ = Attr<int>("workspace_size");
+    workspace_size_ = Attr<int64_t>("workspace_size");
     device_id_ = Attr<int>("gpu_id");
     enable_int8_ = Attr<bool>("enable_int8");
     enable_fp16_ = Attr<bool>("enable_fp16");

diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
@@ -107,7 +107,7 @@ void DynamicShapeTest(bool allow_build_at_runtime) {
 
   engine_op_desc.SetBlockAttr("sub_block", &block_desc);
   engine_op_desc.SetAttr("max_batch_size", static_cast<int>(2));
-  engine_op_desc.SetAttr("workspace_size", static_cast<int>(1 << 20));
+  engine_op_desc.SetAttr("workspace_size", static_cast<int64_t>(1 << 20));
   engine_op_desc.SetAttr("parameters", std::vector<std::string>({}));
   engine_op_desc.SetAttr("engine_key", std::string("a_engine"));
   engine_op_desc.SetAttr("calibration_engine_key",
@@ -259,7 +259,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
 
   engine_op_desc.SetBlockAttr("sub_block", &block_desc);
   engine_op_desc.SetAttr("max_batch_size", static_cast<int>(batch_size));
-  engine_op_desc.SetAttr("workspace_size", static_cast<int>(1 << 20));
+  engine_op_desc.SetAttr("workspace_size", static_cast<int64_t>(1 << 20));
   engine_op_desc.SetAttr("parameters",
                          std::vector<std::string>({"y0", "y1", "y2", "y3"}));
   engine_op_desc.SetAttr("engine_key", std::string("b_engine"));

diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc
@@ -687,7 +687,7 @@ void BindAnalysisConfig(py::module *m) {
       .def("specify_input_name", &AnalysisConfig::specify_input_name)
       .def("enable_tensorrt_engine",
            &AnalysisConfig::EnableTensorRtEngine,
-           py::arg("workspace_size") = 1 << 20,
+           py::arg("workspace_size") = 1 << 30,
            py::arg("max_batch_size") = 1,
            py::arg("min_subgraph_size") = 3,
            py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,