Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Hackathon 7th No.21】为 Paddle 新增 reset_max_memory_reserved/reset_max_memory_allocated API -part #70032

Merged
merged 9 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2424,8 +2424,13 @@ All parameter, weight, gradient are variables in Paddle.
m.def("device_memory_stat_current_value",
memory::DeviceMemoryStatCurrentValue);
m.def("device_memory_stat_peak_value", memory::DeviceMemoryStatPeakValue);
m.def("device_memory_stat_reset_peak_value",
memory::DeviceMemoryStatResetPeakValue);

m.def("host_memory_stat_current_value", memory::HostMemoryStatCurrentValue);
m.def("host_memory_stat_peak_value", memory::HostMemoryStatPeakValue);
m.def("host_memory_stat_reset_peak_value",
memory::HostMemoryStatResetPeakValue);
m.def(
"run_cmd",
[](const std::string &cmd,
Expand Down
12 changes: 12 additions & 0 deletions paddle/phi/core/memory/stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ class StatRegistry {
GetStat(stat_type, dev_id)->Update(increment);
}

void ResetPeakValue(const std::string& stat_type, int dev_id) {
GetStat(stat_type, dev_id)->ResetPeakValue();
}

void Register(const std::string& stat_type, int dev_id, StatBase* stat) {
std::lock_guard<SpinLock> lock_guard(stat_map_lock_);
stat_map_[GetStatKey(stat_type, dev_id)] = stat;
Expand Down Expand Up @@ -93,6 +97,10 @@ void DeviceMemoryStatUpdate(const std::string& stat_type,
StatRegistry::GetInstance()->Update("Device" + stat_type, dev_id, increment);
}

void DeviceMemoryStatResetPeakValue(const std::string& stat_type, int dev_id) {
StatRegistry::GetInstance()->ResetPeakValue("Device" + stat_type, dev_id);
}

int64_t HostMemoryStatCurrentValue(const std::string& stat_type, int dev_id) {
return StatRegistry::GetInstance()->GetCurrentValue("Host" + stat_type,
dev_id);
Expand All @@ -108,6 +116,10 @@ void HostMemoryStatUpdate(const std::string& stat_type,
StatRegistry::GetInstance()->Update("Host" + stat_type, dev_id, increment);
}

void HostMemoryStatResetPeakValue(const std::string& stat_type, int dev_id) {
StatRegistry::GetInstance()->ResetPeakValue("Host" + stat_type, dev_id);
}

void LogDeviceMemoryStats(const phi::Place& place, const std::string& op_name) {
if (FLAGS_log_memory_stats && phi::is_gpu_place(place)) {
VLOG(0) << "After launching op_name: " << op_name << ", "
Expand Down
23 changes: 23 additions & 0 deletions paddle/phi/core/memory/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class StatBase {
virtual int64_t GetCurrentValue() = 0;
virtual int64_t GetPeakValue() = 0;
virtual void Update(int64_t) = 0;
virtual void ResetPeakValue() = 0;

private:
DISABLE_COPY_AND_ASSIGN(StatBase);
Expand Down Expand Up @@ -112,6 +113,22 @@ class Stat : public StatBase {
}
}

void ResetPeakValue() override {
int64_t current_value = GetCurrentValue();
peak_value_.store(current_value, std::memory_order_relaxed);

std::unordered_map<uint64_t, std::reference_wrapper<ThreadLocalStatType>>
thread_local_stats =
ThreadDataRegistry<ThreadLocalStatType>::GetInstance()
.GetAllThreadDataByRef();

for (auto pair : thread_local_stats) {
pair.second.get().peak = pair.second.get().current;
}

VLOG(8) << "Reset peak_value to current_value = " << current_value;
}

private:
Stat() {}
~Stat() {}
Expand All @@ -128,12 +145,14 @@ int64_t DeviceMemoryStatPeakValue(const std::string& stat_type, int dev_id);
void DeviceMemoryStatUpdate(const std::string& stat_type,
int dev_id,
int64_t increment);
void DeviceMemoryStatResetPeakValue(const std::string& stat_type, int dev_id);

int64_t HostMemoryStatCurrentValue(const std::string& stat_type, int dev_id);
int64_t HostMemoryStatPeakValue(const std::string& stat_type, int dev_id);
void HostMemoryStatUpdate(const std::string& stat_type,
int dev_id,
int64_t increment);
void HostMemoryStatResetPeakValue(const std::string& stat_type, int dev_id);

void LogDeviceMemoryStats(const phi::Place& place, const std::string& op_name);

Expand Down Expand Up @@ -179,6 +198,8 @@ void LogDeviceMemoryStats(const phi::Place& place, const std::string& op_name);
DEVICE_MEMORY_STAT_FUNC(item, id, GetPeakValue)
#define DEVICE_MEMORY_STAT_UPDATE(item, id, increment) \
DEVICE_MEMORY_STAT_FUNC(item, id, Update, increment)
#define DEVICE_MEMORY_STAT_RESET_PEAK_VALUE(item, id) \
DEVICE_MEMORY_STAT_FUNC(item, id, ResetPeakValue)

#define HOST_MEMORY_STAT_FUNC(item, id, func, ...) \
[&] { \
Expand All @@ -199,6 +220,8 @@ void LogDeviceMemoryStats(const phi::Place& place, const std::string& op_name);
HOST_MEMORY_STAT_FUNC(item, id, GetPeakValue)
#define HOST_MEMORY_STAT_UPDATE(item, id, increment) \
HOST_MEMORY_STAT_FUNC(item, id, Update, increment)
#define HOST_MEMORY_STAT_RESET_PEAK_VALUE(item, id) \
HOST_MEMORY_STAT_FUNC(item, id, ResetPeakValue)

#define DEVICE_MEMORY_STAT_DECLARE_WITH_ID(item, id) \
struct DeviceMemoryStat##item##id : public ThreadLocalStatBase {}
Expand Down
62 changes: 62 additions & 0 deletions python/paddle/device/cuda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
'get_device_properties',
'get_device_name',
'get_device_capability',
'reset_max_memory_allocated',
'reset_max_memory_reserved',
]


Expand Down Expand Up @@ -298,6 +300,66 @@ def max_memory_reserved(device: _CudaPlaceLike | None = None) -> int:
return core.device_memory_stat_peak_value("Reserved", device_id)


def reset_max_memory_allocated(device: _CudaPlaceLike | None = None) -> None:
'''
Reset the peak size of GPU memory that is allocated to tensor of the given device.

Args:
device(paddle.CUDAPlace|int|str|None, optional): The device, the id of the device or
the string name of device like 'gpu:x'. If device is None, the device is the current device.
Default: None.

Examples:
.. code-block:: python

>>> # doctest: +REQUIRES(env:GPU)
>>> import paddle
>>> paddle.device.set_device('gpu')

>>> paddle.device.cuda.reset_max_memory_allocated(paddle.CUDAPlace(0))
>>> paddle.device.cuda.reset_max_memory_allocated(0)
>>> paddle.device.cuda.reset_max_memory_allocated("gpu:0")
'''

name = "paddle.device.cuda.reset_max_memory_allocated"
if not core.is_compiled_with_cuda():
raise ValueError(
f"The API {name} is not supported in CPU-only PaddlePaddle. Please reinstall PaddlePaddle with GPU support to call this API."
)
device_id = extract_cuda_device_id(device, op_name=name)
core.device_memory_stat_reset_peak_value("Allocated", device_id)


def reset_max_memory_reserved(device: _CudaPlaceLike | None = None) -> None:
'''
Reset the peak size of GPU memory that is held by the allocator of the given device.

Args:
device(paddle.CUDAPlace|int|str|None, optional): The device, the id of the device or
the string name of device like 'gpu:x'. If device is None, the device is the current device.
Default: None.

Examples:
.. code-block:: python

>>> # doctest: +REQUIRES(env:GPU)
>>> import paddle
>>> paddle.device.set_device('gpu')

>>> paddle.device.cuda.reset_max_memory_reserved(paddle.CUDAPlace(0))
>>> paddle.device.cuda.reset_max_memory_reserved(0)
>>> paddle.device.cuda.reset_max_memory_reserved("gpu:0")
'''

name = "paddle.device.cuda.reset_max_memory_reserved"
if not core.is_compiled_with_cuda():
raise ValueError(
f"The API {name} is not supported in CPU-only PaddlePaddle. Please reinstall PaddlePaddle with GPU support to call this API."
)
device_id = extract_cuda_device_id(device, op_name=name)
core.device_memory_stat_reset_peak_value("Reserved", device_id)


def memory_allocated(device: _CudaPlaceLike | None = None) -> int:
'''
Return the current size of gpu memory that is allocated to tensor of the given device.
Expand Down
30 changes: 27 additions & 3 deletions test/cpp/fluid/memory/stats_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,18 @@ class StatsTest : public ::testing::Test {
void SetFunc(
std::function<void(const std::string, int, int64_t)> update_func,
std::function<int64_t(const std::string, int)> current_value_func,
std::function<int64_t(const std::string, int)> peak_value_func) {
std::function<int64_t(const std::string, int)> peak_value_func,
std::function<void(const std::string, int)> reset_peak_value_func) {
update_func_ = update_func;
current_value_func_ = current_value_func;
peak_value_func_ = peak_value_func;
reset_peak_value_func_ = reset_peak_value_func;
}

void RunTests() {
MultiThreadReadWriteTest();
PeakValueTest();
ResetPeakValueTest();
}

private:
Expand Down Expand Up @@ -94,6 +97,18 @@ class StatsTest : public ::testing::Test {
EXPECT_EQ(peak_value_func_(stat_type_, 0), peak_value);
}

void ResetPeakValueTest() {
for (int64_t data : datas_) {
update_func_(stat_type_, 0, data);

EXPECT_GE(peak_value_func_(stat_type_, 0),
current_value_func_(stat_type_, 0));
reset_peak_value_func_(stat_type_, 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

在这个测试中,这一步如果reset不成功,是不是后面的检查也是会通过的?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

如果reset不成功,下一行(107行)的检查应该无法通过。

  void ResetPeakValueTest() {
    for (int64_t data : datas_) {
      update_func_(stat_type_, 0, data);

      EXPECT_GE(peak_value_func_(stat_type_, 0),
                current_value_func_(stat_type_, 0));
      // reset_peak_value_func_(stat_type_, 0);
      printf("data: %ld, Peak Value: %ld, Current Value: %ld\n",data, peak_value_func_(stat_type_, 0), current_value_func_(stat_type_, 0));
      EXPECT_EQ(peak_value_func_(stat_type_, 0),
                current_value_func_(stat_type_, 0));
    }
  }

如果将reset_peak_value_func_函数注释,测试将无法通过。下面为部分运行结果。

115: Test timeout computed to be: 10000000
115: [==========] Running 4 tests from 1 test case.
115: [----------] Global test environment set-up.
115: [----------] 4 tests from StatsTest
115: [ RUN      ] StatsTest.DeviceAllocatedTest
115: data: 543149808935355, Peak Value: 45703145873829393, Current Value: 45703145873829393
115: data: 634698327471328, Peak Value: 46337844201300721, Current Value: 46337844201300721
115: data: 706215795436611, Peak Value: 47044059996737332, Current Value: 47044059996737332
115: data: 577939367795333, Peak Value: 47621999364532665, Current Value: 47621999364532665
115: data: 419479490054362, Peak Value: 48041478854587027, Current Value: 48041478854587027
115: data: 21975227714595, Peak Value: 48063454082301622, Current Value: 48063454082301622
115: data: 812939817942250, Peak Value: 48876393900243872, Current Value: 48876393900243872
115: data: 984428837942082, Peak Value: 49860822738185954, Current Value: 49860822738185954
115: data: 537304104446806, Peak Value: 50398126842632760, Current Value: 50398126842632760
115: data: 685008544452453, Peak Value: 51083135387085213, Current Value: 51083135387085213
115: data: 563352858161268, Peak Value: 51646488245246481, Current Value: 51646488245246481
115: data: 690143831596330, Peak Value: 52336632076842811, Current Value: 52336632076842811
115: data: 964829938186077, Peak Value: 53301462015028888, Current Value: 53301462015028888
115: data: 476984078018245, Peak Value: 53778446093047133, Current Value: 53778446093047133
115: data: 804403365180177, Peak Value: 54582849458227310, Current Value: 54582849458227310
115: data: -57918691189304, Peak Value: 54582849458227310, Current Value: 54524930767038006
115: /home/aistudio/test/Paddle/test/cpp/fluid/memory/stats_test.cc:109: Failure
115: Expected equality of these values:
115:   peak_value_func_(stat_type_, 0)
115:     Which is: 54582849458227310
115:   current_value_func_(stat_type_, 0)
115:     Which is: 54524930767038006
115: data: 947611269236893, Peak Value: 55472542036274899, Current Value: 55472542036274899
115: data: 752188963801927, Peak Value: 56224731000076826, Current Value: 56224731000076826
115: data: 710946451346683, Peak Value: 56935677451423509, Current Value: 56935677451423509
115: data: -49226452527666, Peak Value: 56935677451423509, Current Value: 56886450998895843
115: /home/aistudio/test/Paddle/test/cpp/fluid/memory/stats_test.cc:109: Failure
115: Expected equality of these values:
115:   peak_value_func_(stat_type_, 0)
115:     Which is: 56935677451423509
115:   current_value_func_(stat_type_, 0)
115:     Which is: 56886450998895843
115: data: -59049377393968, Peak Value: 56935677451423509, Current Value: 56827401621501875
115: /home/aistudio/test/Paddle/test/cpp/fluid/memory/stats_test.cc:109: Failure
115: Expected equality of these values:
115:   peak_value_func_(stat_type_, 0)
115:     Which is: 56935677451423509
115:   current_value_func_(stat_type_, 0)
115:     Which is: 56827401621501875
115: data: 14128239868858, Peak Value: 56935677451423509, Current Value: 56841529861370733
115: /home/aistudio/test/Paddle/test/cpp/fluid/memory/stats_test.cc:109: Failure
115: Expected equality of these values:
115:   peak_value_func_(stat_type_, 0)
115:     Which is: 56935677451423509
115:   current_value_func_(stat_type_, 0)
115:     Which is: 56841529861370733
115: data: 463298869064035, Peak Value: 57304828730434768, Current Value: 57304828730434768

EXPECT_EQ(peak_value_func_(stat_type_, 0),
current_value_func_(stat_type_, 0));
}
}

std::string stat_type_;
std::vector<int64_t> datas_{
543149808935355, 634698327471328, 706215795436611, 577939367795333,
Expand Down Expand Up @@ -125,13 +140,15 @@ class StatsTest : public ::testing::Test {
std::function<void(const std::string, int, int64_t)> update_func_;
std::function<int64_t(const std::string, int)> current_value_func_;
std::function<int64_t(const std::string, int)> peak_value_func_;
std::function<void(const std::string, int)> reset_peak_value_func_;
};

TEST_F(StatsTest, DeviceAllocatedTest) {
SetStatType("Allocated");
SetFunc(DeviceMemoryStatUpdate,
DeviceMemoryStatCurrentValue,
DeviceMemoryStatPeakValue);
DeviceMemoryStatPeakValue,
DeviceMemoryStatResetPeakValue);
RunTests();
}

Expand All @@ -146,6 +163,9 @@ TEST_F(StatsTest, DeviceReservedMacroTest) {
},
[](const std::string stat_type, int id) {
return DEVICE_MEMORY_STAT_PEAK_VALUE(Reserved, id);
},
[](const std::string stat_type, int id) {
return DEVICE_MEMORY_STAT_RESET_PEAK_VALUE(Reserved, id);
});
RunTests();
}
Expand All @@ -161,6 +181,9 @@ TEST_F(StatsTest, HostAllocatedMacroTest) {
},
[](const std::string stat_type, int id) {
return HOST_MEMORY_STAT_PEAK_VALUE(Allocated, id);
},
[](const std::string stat_type, int id) {
return HOST_MEMORY_STAT_RESET_PEAK_VALUE(Allocated, id);
});
RunTests();
}
Expand All @@ -169,7 +192,8 @@ TEST_F(StatsTest, HostReservedTest) {
SetStatType("Reserved");
SetFunc(HostMemoryStatUpdate,
HostMemoryStatCurrentValue,
HostMemoryStatPeakValue);
HostMemoryStatPeakValue,
HostMemoryStatResetPeakValue);
RunTests();
}

Expand Down
89 changes: 89 additions & 0 deletions test/legacy_test/test_cuda_reset_max_memory_allocated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import paddle
from paddle.base import core
from paddle.device.cuda import (
device_count,
max_memory_allocated,
memory_allocated,
reset_max_memory_allocated,
)


class TestResetMaxMemoryAllocated(unittest.TestCase):
def func_test_reset_max_memory_allocated(self, device=None):
if core.is_compiled_with_cuda():
alloc_time = 100
max_alloc_size = 10000
for i in range(alloc_time):
# first alloc
shape = paddle.randint(
low=max_alloc_size, high=max_alloc_size * 2
)
tensor = paddle.zeros(shape)
peak_memory_allocated_size_first = max_memory_allocated(device)

del shape
del tensor

# second alloc
shape = paddle.randint(low=0, high=max_alloc_size)
tensor = paddle.zeros(shape)

# reset peak memory stats
reset_max_memory_allocated(device)

peak_memory_allocated_size_second = max_memory_allocated(device)
self.assertEqual(
peak_memory_allocated_size_second, memory_allocated(device)
)
self.assertLess(
peak_memory_allocated_size_second,
peak_memory_allocated_size_first,
)

del shape
del tensor

def test_reset_max_memory_allocated_for_all_places(self):
if core.is_compiled_with_cuda():
gpu_num = device_count()
for i in range(gpu_num):
paddle.device.set_device("gpu:" + str(i))
self.func_test_reset_max_memory_allocated(core.CUDAPlace(i))
self.func_test_reset_max_memory_allocated(i)
self.func_test_reset_max_memory_allocated("gpu:" + str(i))

def test_reset_max_memory_allocated_exception(self):
if core.is_compiled_with_cuda():
wrong_device = [
core.CPUPlace(),
device_count() + 1,
-2,
0.5,
"gpu1",
]
for device in wrong_device:
with self.assertRaises(BaseException): # noqa: B017
reset_max_memory_allocated(device)
else:
with self.assertRaises(ValueError):
reset_max_memory_allocated()


if __name__ == "__main__":
unittest.main()
Loading