Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#37 from jiweibo/shrink_memory
Browse files Browse the repository at this point in the history
[ShrinkMemory] Update shrink memory multi-thread demo.
  • Loading branch information
jiweibo authored Nov 3, 2020
2 parents 229e40d + 7652cfe commit 09284c0
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 37 deletions.
8 changes: 4 additions & 4 deletions c++/test/shrink_memory/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
### 二:**样例编译**

文件`single_thread_test.cc` 为单线程使用ShrinkMemory降低内/显存的预测样例程序(程序中的输入为固定值,如果您有opencv或其他方式进行数据读取的需求,需要对程序进行一定的修改)。
文件`thread_local_test.cc` 为使用thread_local多线程使用ShrinkMemory降低内/显存的预测样例。
文件`multi_thread_test.cc` 为多线程使用ShrinkMemory降低内/显存的预测样例程序。
文件`CMakeLists.txt` 为编译构建文件。
脚本`run_impl.sh` 包含了第三方库、预编译库的信息配置。
Expand All @@ -18,7 +19,7 @@
打开`run_impl.sh`,我们对以下的几处信息进行修改:

```shell
# 根据需要选择single_thread_test或multi_thread_test
# 根据需要选择single_thread_test, multi_thread_test, thread_local_test
DEMO_NAME=single_thread_test

# 根据预编译库中的version.txt信息判断是否将以下三个标记打开
Expand All @@ -45,13 +46,12 @@ CUDA_LIB=/usr/local/cuda/lib64
cd build
# 运行样例
./single_thread_test -model_dir ${YOLO_MODEL_PATH} --use_gpu
# ./multi_thread_test --model_dir ${YOUR_MODEL_PATH} --use_gpu
# ./multi_thread_test --model_dir ${YOUR_MODEL_PATH} --use_gpu --thread_num 2
# ./thread_local_test --model_dir ${YOUR_MODEL_PATH} --use_gpu
```

运行过程中,请根据提示观测GPU的显存占用或CPU的内存占用,可以发现,当某次运行的batch_size很大时,会使得显/内存池较大,此时应用的显/内存占用较高,可以通过ShrinkMemory操作来显示的释放显/内存池。

注意,多线程测试示例中必须EnableGpuMultiStream(),来选择thread_local allocator来支持ShrinkMemory操作。

### 更多链接
- [Paddle Inference使用Quick Start!]()
- [Paddle Inference Python Api使用]()
52 changes: 20 additions & 32 deletions c++/test/shrink_memory/multi_thread_test.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#include "helper.h"
#include "paddle/include/paddle_inference_api.h"
#include <chrono>
#include <gflags/gflags.h>
Expand All @@ -9,41 +8,23 @@
#include <vector>

DEFINE_string(model_dir, "./mobilenetv1", "model directory.");
DEFINE_int32(thread_num, 1, "thread num");
DEFINE_bool(use_gpu, false, "use gpu.");
DEFINE_bool(test_leaky, false,
"run 1000 times, and observe whether leaky memory or not.");

const size_t thread_num = 2;
paddle::inference::Timer timer_sum;
paddle::inference::Barrier barrier_init(thread_num);
paddle::inference::Barrier barrier_warmup(thread_num);

namespace paddle_infer {

void PrepareConfig(Config *config) {
config->SetModel(FLAGS_model_dir + "/model", FLAGS_model_dir + "/params");
if (FLAGS_use_gpu) {
config->EnableUseGpu(500, 0);
}
// switch to thread_local allocator.
config->EnableGpuMultiStream();
}

void Run(int thread_id) {
Config config;
PrepareConfig(&config);

// create predictor
static std::mutex mutex;

std::shared_ptr<Predictor> predictor;
{
std::unique_lock<std::mutex> lock(mutex);
predictor = CreatePredictor(config);
}
void Run(std::shared_ptr<Predictor> predictor, int thread_id) {

auto run_one_loop = [&](int batch_size) {
// prepare inputs.
int channels = 3;
int height = 224;
int width = 224;
Expand Down Expand Up @@ -73,10 +54,6 @@ void Run(int thread_id) {
<< " mean val: " << mean_val / output_num;
};

barrier_init.Wait();
run_one_loop(1);
barrier_warmup.Wait();

auto pause = [](const std::string &hint) {
if (FLAGS_test_leaky) {
return;
Expand All @@ -93,27 +70,38 @@ void Run(int thread_id) {
for (int i = 0; i < run_times; ++i) {
run_one_loop(40);
pause("Pause, you can view the GPU memory usage, please enter any "
"character to continue running.");
"character to continue running. thread_id is " +
std::to_string(thread_id));

// release memory pool.
predictor->ShrinkMemory();
pause("Pause, ShrinkMemory has been called, please observe the changes of "
"GPU memory.");
"GPU memory. thread_idis " +
std::to_string(thread_id));

run_one_loop(1);
pause("Pause, you can view the GPU memory usage, please enter any "
"character to continue running.");
"character to continue running. thread_id is " +
std::to_string(thread_id));
}
}
}

int main(int argc, char **argv) {
google::ParseCommandLineFlags(&argc, &argv, true);

paddle_infer::Config config;
paddle_infer::PrepareConfig(&config);
auto main_predictor = paddle_infer::CreatePredictor(config);
std::vector<decltype(main_predictor)> predictors;
for (int i = 0; i < FLAGS_thread_num; ++i) {
predictors.emplace_back(std::move(main_predictor->Clone()));
}

std::vector<std::thread> threads;
for (size_t i = 0; i < thread_num; ++i) {
threads.emplace_back([&, i]() { paddle_infer::Run(i); });
for (int i = 0; i < FLAGS_thread_num; ++i) {
threads.emplace_back(paddle_infer::Run, predictors[i], i);
}
for (size_t i = 0; i < thread_num; ++i) {
for (int i = 0; i < FLAGS_thread_num; ++i) {
threads[i].join();
}
LOG(INFO) << "Run done";
Expand Down
3 changes: 2 additions & 1 deletion c++/test/shrink_memory/run_impl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ mkdir -p build
cd build
rm -rf *

# same with the single_thread_test or multi_thread_test
# same with the single_thread_test, multi_thread_test or thread_local_test
DEMO_NAME=single_thread_test
#DEMO_NAME=multi_thread_test
#DEMO_NAME=thread_local_test

WITH_MKL=ON
WITH_GPU=ON
Expand Down
120 changes: 120 additions & 0 deletions c++/test/shrink_memory/thread_local_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#include "helper.h"
#include "paddle/include/paddle_inference_api.h"
#include <chrono>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <iostream>
#include <numeric>
#include <thread>
#include <vector>

DEFINE_string(model_dir, "./mobilenetv1", "model directory.");
DEFINE_bool(use_gpu, false, "use gpu.");
DEFINE_bool(test_leaky, false,
"run 1000 times, and observe whether leaky memory or not.");

const size_t thread_num = 2;
paddle::inference::Timer timer_sum;
paddle::inference::Barrier barrier_init(thread_num);
paddle::inference::Barrier barrier_warmup(thread_num);

namespace paddle_infer {

void PrepareConfig(Config *config) {
config->SetModel(FLAGS_model_dir + "/model", FLAGS_model_dir + "/params");
if (FLAGS_use_gpu) {
config->EnableUseGpu(500, 0);
}
// switch to thread_local allocator.
config->EnableGpuMultiStream();
}

void Run(int thread_id) {
Config config;
PrepareConfig(&config);

// create predictor
static std::mutex mutex;

std::shared_ptr<Predictor> predictor;
{
std::unique_lock<std::mutex> lock(mutex);
predictor = CreatePredictor(config);
}

auto run_one_loop = [&](int batch_size) {
// prepare inputs.
int channels = 3;
int height = 224;
int width = 224;
int input_num = channels * height * width * batch_size;
std::vector<float> in_data(input_num, 0);
for (int i = 0; i < input_num; ++i) {
in_data[i] = i % 255 * 0.1;
}
auto in_names = predictor->GetInputNames();
auto in_handle = predictor->GetInputHandle(in_names[0]);
in_handle->Reshape({batch_size, channels, height, width});
in_handle->CopyFromCpu(in_data.data());
CHECK(predictor->Run());
auto out_names = predictor->GetOutputNames();
auto out_handle = predictor->GetOutputHandle(out_names[0]);
std::vector<float> out_data;
std::vector<int> temp_shape = out_handle->shape();
int output_num = std::accumulate(temp_shape.begin(), temp_shape.end(), 1,
std::multiplies<int>());
out_data.resize(output_num);
out_handle->CopyToCpu(out_data.data());
float mean_val = 0;
for (size_t j = 0; j < output_num; ++j) {
mean_val += out_data[j];
}
LOG(INFO) << "thread_id: " << thread_id << " batch_size: " << batch_size
<< " mean val: " << mean_val / output_num;
};

barrier_init.Wait();
run_one_loop(1);
barrier_warmup.Wait();

auto pause = [](const std::string &hint) {
if (FLAGS_test_leaky) {
return;
}
std::string temp;
LOG(INFO) << hint;
std::getline(std::cin, temp);
};

int run_times = 1;
if (FLAGS_test_leaky) {
run_times = 100;
}
for (int i = 0; i < run_times; ++i) {
run_one_loop(40);
pause("Pause, you can view the GPU memory usage, please enter any "
"character to continue running.");

// release memory pool.
predictor->ShrinkMemory();
pause("Pause, ShrinkMemory has been called, please observe the changes of "
"GPU memory.");

run_one_loop(1);
pause("Pause, you can view the GPU memory usage, please enter any "
"character to continue running.");
}
}
}

int main(int argc, char **argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
std::vector<std::thread> threads;
for (size_t i = 0; i < thread_num; ++i) {
threads.emplace_back([&, i]() { paddle_infer::Run(i); });
}
for (size_t i = 0; i < thread_num; ++i) {
threads[i].join();
}
LOG(INFO) << "Run done";
}

0 comments on commit 09284c0

Please sign in to comment.