Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#48 from jiweibo/2.0_api_for_trt
Browse files Browse the repository at this point in the history
update paddle-trt demo with 2.0 api.
  • Loading branch information
jiweibo authored Dec 3, 2020
2 parents c2d7812 + 50903a3 commit a0e45d3
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 90 deletions.
12 changes: 6 additions & 6 deletions c++/paddle-trt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ WITH_GPU=ON
USE_TENSORRT=ON

# 配置预测库的根目录
LIB_DIR=/paddle/fluid_inference_install_dir
LIB_DIR=/paddle/paddle_inference_install_dir

# 如果上述的WITH_GPU 或 USE_TENSORRT设为ON,请设置对应的CUDA, CUDNN, TENSORRT的路径。请注意CUDA和CUDNN需要设置到lib64一层,而TensorRT是设置到根目录一层
CUDNN_LIB=/paddle/nvidia-downloads/cudnn_v7.6_cuda10.1/lib64
Expand Down Expand Up @@ -77,7 +77,7 @@ WITH_GPU=ON
USE_TENSORRT=ON

# 配置预测库的根目录
LIB_DIR=/paddle/fluid_inference_install_dir
LIB_DIR=/paddle/paddle_inference_install_dir

# 如果上述的WITH_GPU 或 USE_TENSORRT设为ON,请设置对应的CUDA, CUDNN, TENSORRT的路径。请注意CUDA和CUDNN需要设置到lib64一层,而TensorRT是设置到根目录一层
CUDNN_LIB=/paddle/nvidia-downloads/cudnn_v7.6_cuda10.1/lib64
Expand All @@ -100,7 +100,7 @@ cd build

#### 加载校准表执行Int8预测

再次修改`run_impl.sh`,换成执行Int8预测的demo:
1) 再次修改`run_impl.sh`,换成执行Int8预测的demo:

```shell
# 选择执行Int8预测的demo
Expand All @@ -112,7 +112,7 @@ WITH_GPU=ON
USE_TENSORRT=ON

# 配置预测库的根目录
LIB_DIR=/paddle/fluid_inference_install_dir
LIB_DIR=/paddle/paddle_inference_install_dir

# 如果上述的WITH_GPU 或 USE_TENSORRT设为ON,请设置对应的CUDA, CUDNN, TENSORRT的路径。请注意CUDA和CUDNN需要设置到lib64一层,而TensorRT是设置到根目录一层
CUDNN_LIB=/paddle/nvidia-downloads/cudnn_v7.6_cuda10.1/lib64
Expand Down Expand Up @@ -177,7 +177,7 @@ WITH_GPU=ON
USE_TENSORRT=ON

# 配置预测库的根目录
LIB_DIR=/paddle/fluid_inference_install_dir
LIB_DIR=/paddle/paddle_inference_install_dir

# 如果上述的WITH_GPU 或 USE_TENSORRT设为ON,请设置对应的CUDA, CUDNN, TENSORRT的路径。请注意CUDA和CUDNN需要设置到lib64一层,而TensorRT是设置到根目录一层
CUDNN_LIB=/paddle/nvidia-downloads/cudnn_v7.6_cuda10.1/lib64
Expand All @@ -194,7 +194,7 @@ TENSORRT_ROOT=/paddle/nvidia-downloads/TensorRT-6.0.1.5
# 进入build目录
cd build
# 运行样例,注意此处要将use_calib配置为false
./trt_int8_test --model_file=../ResNet50_quant/model --params_file=../ResNet50_quant/params --use_calib=false
./trt_int8_test --model_dir=../ResNet50_quant/ --use_calib=false
```

运行结束后,程序会将模型预测输出的前20个结果打印到屏幕,说明运行成功。
Expand Down
2 changes: 1 addition & 1 deletion c++/paddle-trt/run_impl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ WITH_MKL=ON
WITH_GPU=ON
USE_TENSORRT=ON

LIB_DIR=/paddle/build/fluid_inference_install_dir
LIB_DIR=/paddle/build/paddle_inference_install_dir
CUDNN_LIB=/paddle/nvidia-downloads/cudnn_v7.6_cuda10.1/lib64
CUDA_LIB=/paddle/nvidia-downloads/cuda-10.1/lib64
TENSORRT_ROOT=/paddle/nvidia-downloads/TensorRT-6.0.1.5
Expand Down
58 changes: 30 additions & 28 deletions c++/paddle-trt/trt_fp32_test.cc
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
#include <numeric>
#include <chrono>
#include <iostream>
#include <memory>
#include <chrono>
#include <numeric>

#include <gflags/gflags.h>
#include <glog/logging.h>

#include "paddle/include/paddle_inference_api.h"

using paddle::AnalysisConfig;
using paddle_infer::Config;
using paddle_infer::Predictor;
using paddle_infer::CreatePredictor;
using paddle_infer::PrecisionType;

DEFINE_string(model_file, "", "Path of the inference model file.");
DEFINE_string(params_file, "", "Path of the inference params file.");
Expand All @@ -24,50 +27,49 @@ double time_diff(Time t1, Time t2) {
return counter.count() / 1000.0;
}

std::unique_ptr<paddle::PaddlePredictor> CreatePredictor() {
AnalysisConfig config;
std::shared_ptr<Predictor> InitPredictor() {
Config config;
if (FLAGS_model_dir != "") {
config.SetModel(FLAGS_model_dir);
} else {
config.SetModel(FLAGS_model_file,
FLAGS_params_file);
config.SetModel(FLAGS_model_file, FLAGS_params_file);
}
config.EnableUseGpu(500, 0);
// We use ZeroCopy, so we set config.SwitchUseFeedFetchOps(false) here.
config.SwitchUseFeedFetchOps(false);
config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 5, AnalysisConfig::Precision::kFloat32, false, false);
return CreatePaddlePredictor(config);
config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 5,
PrecisionType::kFloat32, false, false);
return CreatePredictor(config);
}

void run(paddle::PaddlePredictor *predictor,
const std::vector<float>& input,
const std::vector<int>& input_shape,
std::vector<float> *out_data) {
int input_num = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies<int>());
void run(Predictor *predictor, const std::vector<float> &input,
const std::vector<int> &input_shape, std::vector<float> *out_data) {
int input_num = std::accumulate(input_shape.begin(), input_shape.end(), 1,
std::multiplies<int>());

auto input_names = predictor->GetInputNames();
auto input_t = predictor->GetInputTensor(input_names[0]);
auto input_t = predictor->GetInputHandle(input_names[0]);
input_t->Reshape(input_shape);
input_t->copy_from_cpu(input.data());
input_t->CopyFromCpu(input.data());

CHECK(predictor->ZeroCopyRun());
CHECK(predictor->Run());

auto output_names = predictor->GetOutputNames();
// there is only one output of Resnet50
auto output_t = predictor->GetOutputTensor(output_names[0]);
auto output_t = predictor->GetOutputHandle(output_names[0]);
std::vector<int> output_shape = output_t->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int>());

out_data->resize(out_num);
output_t->copy_to_cpu(out_data->data());
output_t->CopyToCpu(out_data->data());
}
int main(int argc, char* argv[]) {

int main(int argc, char *argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
auto predictor = CreatePredictor();
std::vector<int> input_shape = {FLAGS_batch_size, 3, 224, 224};
// Init input as 1.0 here for example. You can also load preprocessed real pictures to vectors as input.
std::vector<float> input_data(FLAGS_batch_size * 3 * 224 * 224, 1.0);
auto predictor = InitPredictor();
std::vector<int> input_shape = {FLAGS_batch_size, 3, 224, 224};
// Init input as 1.0 here for example. You can also load preprocessed real
// pictures to vectors as input.
std::vector<float> input_data(FLAGS_batch_size * 3 * 224 * 224, 1.0);
std::vector<float> out_data;
run(predictor.get(), input_data, input_shape, &out_data);
// Print first 20 outputs
Expand Down
54 changes: 28 additions & 26 deletions c++/paddle-trt/trt_gen_calib_table_test.cc
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
#include <numeric>
#include <chrono>
#include <iostream>
#include <memory>
#include <chrono>
#include <numeric>
#include <random>

#include <gflags/gflags.h>
#include <glog/logging.h>

#include "paddle/include/paddle_inference_api.h"

using paddle::AnalysisConfig;
using paddle_infer::Config;
using paddle_infer::Predictor;
using paddle_infer::CreatePredictor;
using paddle_infer::PrecisionType;

DEFINE_string(model_file, "", "Path of the inference model file.");
DEFINE_string(params_file, "", "Path of the inference params file.");
Expand All @@ -32,47 +35,46 @@ double time_diff(Time t1, Time t2) {
return counter.count() / 1000.0;
}

std::unique_ptr<paddle::PaddlePredictor> CreatePredictor() {
AnalysisConfig config;
std::shared_ptr<Predictor> InitPredictor() {
Config config;
if (FLAGS_model_dir != "") {
config.SetModel(FLAGS_model_dir);
} else {
config.SetModel(FLAGS_model_file,
FLAGS_params_file);
config.SetModel(FLAGS_model_file, FLAGS_params_file);
}
config.EnableUseGpu(500, 0);
// We use ZeroCopy, so we set config.SwitchUseFeedFetchOps(false) here.
config.SwitchUseFeedFetchOps(false);
config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 5, AnalysisConfig::Precision::kInt8, false, true /*use_calib*/);
return CreatePaddlePredictor(config);
config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 5,
PrecisionType::kInt8, false, true /*use_calib*/);
return CreatePredictor(config);
}

void run(paddle::PaddlePredictor *predictor,
std::vector<float>& input,
const std::vector<int>& input_shape,
std::vector<float> *out_data) {
int input_num = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies<int>());
void run(Predictor *predictor, std::vector<float> &input,
const std::vector<int> &input_shape, std::vector<float> *out_data) {
int input_num = std::accumulate(input_shape.begin(), input_shape.end(), 1,
std::multiplies<int>());
for (size_t i = 0; i < 500; i++) {
// We use random data here for example. Change this to real data in your application.
// We use random data here for example. Change this to real data in your
// application.
for (int j = 0; j < input_num; j++) {
input[j] = Random(0, 1.0);
}
auto input_names = predictor->GetInputNames();
auto input_t = predictor->GetInputTensor(input_names[0]);
auto input_t = predictor->GetInputHandle(input_names[0]);
input_t->Reshape(input_shape);
input_t->copy_from_cpu(input.data());
input_t->CopyFromCpu(input.data());

// Run predictor to generate calibration table. Can be very time-consuming.
CHECK(predictor->ZeroCopyRun());
// Run predictor to generate calibration table. Can be very time-consuming.
CHECK(predictor->Run());
}
}

int main(int argc, char* argv[]) {
int main(int argc, char *argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
auto predictor = CreatePredictor();
std::vector<int> input_shape = {FLAGS_batch_size, 3, 224, 224};
// Init input as 1.0 here for example. You can also load preprocessed real pictures to vectors as input.
std::vector<float> input_data(FLAGS_batch_size * 3 * 224 * 224, 1.0);
auto predictor = InitPredictor();
std::vector<int> input_shape = {FLAGS_batch_size, 3, 224, 224};
// Init input as 1.0 here for example. You can also load preprocessed real
// pictures to vectors as input.
std::vector<float> input_data(FLAGS_batch_size * 3 * 224 * 224, 1.0);
std::vector<float> out_data;
run(predictor.get(), input_data, input_shape, &out_data);
return 0;
Expand Down
62 changes: 33 additions & 29 deletions c++/paddle-trt/trt_int8_test.cc
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
#include <numeric>
#include <chrono>
#include <iostream>
#include <memory>
#include <chrono>
#include <numeric>

#include <gflags/gflags.h>
#include <glog/logging.h>

#include "paddle/include/paddle_inference_api.h"

using paddle::AnalysisConfig;
using paddle_infer::Config;
using paddle_infer::Predictor;
using paddle_infer::CreatePredictor;
using paddle_infer::PrecisionType;

DEFINE_string(model_file, "", "Path of the inference model file.");
DEFINE_string(params_file, "", "Path of the inference params file.");
DEFINE_string(model_dir, "", "Directory of the inference model.");
DEFINE_int32(batch_size, 1, "Batch size.");
DEFINE_bool(use_calib, true, "Whether to use calib. Set to true if you are using TRT calibration; Set to false if you are using PaddleSlim quant models.");
DEFINE_bool(use_calib, true, "Whether to use calib. Set to true if you are "
"using TRT calibration; Set to false if you are "
"using PaddleSlim quant models.");

using Time = decltype(std::chrono::high_resolution_clock::now());
Time time() { return std::chrono::high_resolution_clock::now(); };
Expand All @@ -25,50 +30,49 @@ double time_diff(Time t1, Time t2) {
return counter.count() / 1000.0;
}

std::unique_ptr<paddle::PaddlePredictor> CreatePredictor() {
AnalysisConfig config;
std::shared_ptr<Predictor> InitPredictor() {
Config config;
if (FLAGS_model_dir != "") {
config.SetModel(FLAGS_model_dir);
} else {
config.SetModel(FLAGS_model_file,
FLAGS_params_file);
config.SetModel(FLAGS_model_file, FLAGS_params_file);
}
config.EnableUseGpu(500, 0);
// We use ZeroCopy, so we set config.SwitchUseFeedFetchOps(false) here.
config.SwitchUseFeedFetchOps(false);
config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 5, AnalysisConfig::Precision::kInt8, false, FLAGS_use_calib);
return CreatePaddlePredictor(config);
config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 5,
PrecisionType::kInt8, false, FLAGS_use_calib);
return CreatePredictor(config);
}

void run(paddle::PaddlePredictor *predictor,
const std::vector<float>& input,
const std::vector<int>& input_shape,
std::vector<float> *out_data) {
int input_num = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies<int>());
void run(Predictor *predictor, const std::vector<float> &input,
const std::vector<int> &input_shape, std::vector<float> *out_data) {
int input_num = std::accumulate(input_shape.begin(), input_shape.end(), 1,
std::multiplies<int>());

auto input_names = predictor->GetInputNames();
auto input_t = predictor->GetInputTensor(input_names[0]);
auto input_t = predictor->GetInputHandle(input_names[0]);
input_t->Reshape(input_shape);
input_t->copy_from_cpu(input.data());
input_t->CopyFromCpu(input.data());

CHECK(predictor->ZeroCopyRun());
CHECK(predictor->Run());

auto output_names = predictor->GetOutputNames();
// there is only one output of Resnet50
auto output_t = predictor->GetOutputTensor(output_names[0]);
auto output_t = predictor->GetOutputHandle(output_names[0]);
std::vector<int> output_shape = output_t->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int>());

out_data->resize(out_num);
output_t->copy_to_cpu(out_data->data());
output_t->CopyToCpu(out_data->data());
}
int main(int argc, char* argv[]) {

int main(int argc, char *argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
auto predictor = CreatePredictor();
std::vector<int> input_shape = {FLAGS_batch_size, 3, 224, 224};
// Init input as 1.0 here for example. You can also load preprocessed real pictures to vectors as input.
std::vector<float> input_data(FLAGS_batch_size * 3 * 224 * 224, 1.0);
auto predictor = InitPredictor();
std::vector<int> input_shape = {FLAGS_batch_size, 3, 224, 224};
// Init input as 1.0 here for example. You can also load preprocessed real
// pictures to vectors as input.
std::vector<float> input_data(FLAGS_batch_size * 3 * 224 * 224, 1.0);
std::vector<float> out_data;
run(predictor.get(), input_data, input_shape, &out_data);
// Print first 20 outputs
Expand Down

0 comments on commit a0e45d3

Please sign in to comment.