Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Hackathon 7th No.21】为 Paddle 新增 reset_max_memory_reserved/reset_max_memory_allocated API -part #70032

Merged
merged 9 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2424,8 +2424,26 @@ All parameter, weight, gradient are variables in Paddle.
m.def("device_memory_stat_current_value",
memory::DeviceMemoryStatCurrentValue);
m.def("device_memory_stat_peak_value", memory::DeviceMemoryStatPeakValue);
m.def("device_memory_stat_reset_peak_value",
memory::DeviceMemoryStatResetPeakValue);

m.def("device_memory_stats", [](int dev_id) {
py::dict dict;
dict["memory.allocated.current"] =
memory::DeviceMemoryStatCurrentValue("Allocated", dev_id);
dict["memory.allocated.peak"] =
memory::DeviceMemoryStatPeakValue("Allocated", dev_id);
dict["memory.reserved.current"] =
memory::DeviceMemoryStatCurrentValue("Reserved", dev_id);
dict["memory.reserved.peak"] =
memory::DeviceMemoryStatPeakValue("Reserved", dev_id);
return dict;
});

m.def("host_memory_stat_current_value", memory::HostMemoryStatCurrentValue);
m.def("host_memory_stat_peak_value", memory::HostMemoryStatPeakValue);
m.def("host_memory_stat_reset_peak_value",
memory::HostMemoryStatResetPeakValue);
m.def(
"run_cmd",
[](const std::string &cmd,
Expand Down
12 changes: 12 additions & 0 deletions paddle/phi/core/memory/stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ class StatRegistry {
GetStat(stat_type, dev_id)->Update(increment);
}

void ResetPeakValue(const std::string& stat_type, int dev_id) {
GetStat(stat_type, dev_id)->ResetPeakValue();
}

void Register(const std::string& stat_type, int dev_id, StatBase* stat) {
std::lock_guard<SpinLock> lock_guard(stat_map_lock_);
stat_map_[GetStatKey(stat_type, dev_id)] = stat;
Expand Down Expand Up @@ -93,6 +97,10 @@ void DeviceMemoryStatUpdate(const std::string& stat_type,
StatRegistry::GetInstance()->Update("Device" + stat_type, dev_id, increment);
}

void DeviceMemoryStatResetPeakValue(const std::string& stat_type, int dev_id) {
StatRegistry::GetInstance()->ResetPeakValue("Device" + stat_type, dev_id);
}

int64_t HostMemoryStatCurrentValue(const std::string& stat_type, int dev_id) {
return StatRegistry::GetInstance()->GetCurrentValue("Host" + stat_type,
dev_id);
Expand All @@ -108,6 +116,10 @@ void HostMemoryStatUpdate(const std::string& stat_type,
StatRegistry::GetInstance()->Update("Host" + stat_type, dev_id, increment);
}

void HostMemoryStatResetPeakValue(const std::string& stat_type, int dev_id) {
StatRegistry::GetInstance()->ResetPeakValue("Host" + stat_type, dev_id);
}

void LogDeviceMemoryStats(const phi::Place& place, const std::string& op_name) {
if (FLAGS_log_memory_stats && phi::is_gpu_place(place)) {
VLOG(0) << "After launching op_name: " << op_name << ", "
Expand Down
23 changes: 23 additions & 0 deletions paddle/phi/core/memory/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class StatBase {
virtual int64_t GetCurrentValue() = 0;
virtual int64_t GetPeakValue() = 0;
virtual void Update(int64_t) = 0;
virtual void ResetPeakValue() = 0;

private:
DISABLE_COPY_AND_ASSIGN(StatBase);
Expand Down Expand Up @@ -112,6 +113,22 @@ class Stat : public StatBase {
}
}

void ResetPeakValue() override {
int64_t current_value = GetCurrentValue();
peak_value_.store(current_value, std::memory_order_relaxed);

std::unordered_map<uint64_t, std::reference_wrapper<ThreadLocalStatType>>
thread_local_stats =
ThreadDataRegistry<ThreadLocalStatType>::GetInstance()
.GetAllThreadDataByRef();

for (auto pair : thread_local_stats) {
pair.second.get().peak = pair.second.get().current;
}

VLOG(8) << "Reset peak_value to current_value = " << current_value;
}

private:
Stat() {}
~Stat() {}
Expand All @@ -128,12 +145,14 @@ int64_t DeviceMemoryStatPeakValue(const std::string& stat_type, int dev_id);
void DeviceMemoryStatUpdate(const std::string& stat_type,
int dev_id,
int64_t increment);
void DeviceMemoryStatResetPeakValue(const std::string& stat_type, int dev_id);

int64_t HostMemoryStatCurrentValue(const std::string& stat_type, int dev_id);
int64_t HostMemoryStatPeakValue(const std::string& stat_type, int dev_id);
void HostMemoryStatUpdate(const std::string& stat_type,
int dev_id,
int64_t increment);
void HostMemoryStatResetPeakValue(const std::string& stat_type, int dev_id);

void LogDeviceMemoryStats(const phi::Place& place, const std::string& op_name);

Expand Down Expand Up @@ -179,6 +198,8 @@ void LogDeviceMemoryStats(const phi::Place& place, const std::string& op_name);
DEVICE_MEMORY_STAT_FUNC(item, id, GetPeakValue)
#define DEVICE_MEMORY_STAT_UPDATE(item, id, increment) \
DEVICE_MEMORY_STAT_FUNC(item, id, Update, increment)
#define DEVICE_MEMORY_STAT_RESET_PEAK_VALUE(item, id) \
DEVICE_MEMORY_STAT_FUNC(item, id, ResetPeakValue)

#define HOST_MEMORY_STAT_FUNC(item, id, func, ...) \
[&] { \
Expand All @@ -199,6 +220,8 @@ void LogDeviceMemoryStats(const phi::Place& place, const std::string& op_name);
HOST_MEMORY_STAT_FUNC(item, id, GetPeakValue)
#define HOST_MEMORY_STAT_UPDATE(item, id, increment) \
HOST_MEMORY_STAT_FUNC(item, id, Update, increment)
#define HOST_MEMORY_STAT_RESET_PEAK_VALUE(item, id) \
HOST_MEMORY_STAT_FUNC(item, id, ResetPeakValue)

#define DEVICE_MEMORY_STAT_DECLARE_WITH_ID(item, id) \
struct DeviceMemoryStat##item##id : public ThreadLocalStatBase {}
Expand Down
140 changes: 140 additions & 0 deletions python/paddle/device/cuda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@
'get_device_properties',
'get_device_name',
'get_device_capability',
'reset_peak_memory_stats',
'reset_max_memory_allocated',
'reset_max_memory_reserved',
'memory_stats',
]


Expand Down Expand Up @@ -298,6 +302,142 @@ def max_memory_reserved(device: _CudaPlaceLike | None = None) -> int:
return core.device_memory_stat_peak_value("Reserved", device_id)


def reset_peak_memory_stats(device: _CudaPlaceLike | None = None) -> None:
'''
Reset the peak values of GPU memory allocated and reserved to the current values.

This function resets the "peak" stats tracked by the CUDA memory allocator for both
memory allocated and memory reserved. After calling this function, the peak values
will be set to the current memory usage values.

Args:
device(paddle.CUDAPlace|int|str|None, optional): The device, the id of the device or
the string name of device like 'gpu:x'. If device is None, the device is the current device.
Default: None.

Examples:
.. code-block:: python

>>> # doctest: +REQUIRES(env:GPU)
>>> import paddle
>>> paddle.device.set_device('gpu')

>>> paddle.device.cuda.reset_peak_memory_stats(paddle.CUDAPlace(0))
>>> paddle.device.cuda.reset_peak_memory_stats(0)
>>> paddle.device.cuda.reset_peak_memory_stats("gpu:0")
'''
name = "paddle.device.cuda.reset_peak_memory_stats"
if not core.is_compiled_with_cuda():
raise ValueError(
f"The API {name} is not supported in CPU-only PaddlePaddle. Please reinstall PaddlePaddle with GPU support to call this API."
)
device_id = extract_cuda_device_id(device, op_name=name)
core.device_memory_stat_reset_peak_value("Allocated", device_id)
core.device_memory_stat_reset_peak_value("Reserved", device_id)


def reset_max_memory_allocated(device: _CudaPlaceLike | None = None) -> None:
'''
Reset the peak values of GPU memory allocated to the current values.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

接口说明最好和max_memory_allocated保持一致。
Reset the peak size of GPU memory that is held by the allocator of the given device.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

嗯,收到

Allocated memory refers to the GPU memory that is currently allocated to tensors.

Args:
device(paddle.CUDAPlace|int|str|None, optional): The device, the id of the device or
the string name of device like 'gpu:x'. If device is None, the device is the current device.
Default: None.

Examples:
.. code-block:: python

>>> # doctest: +REQUIRES(env:GPU)
>>> import paddle
>>> paddle.device.set_device('gpu')

>>> paddle.device.cuda.reset_max_memory_allocated(paddle.CUDAPlace(0))
>>> paddle.device.cuda.reset_max_memory_allocated(0)
>>> paddle.device.cuda.reset_max_memory_allocated("gpu:0")
'''

name = "paddle.device.cuda.reset_max_memory_allocated"
if not core.is_compiled_with_cuda():
raise ValueError(
f"The API {name} is not supported in CPU-only PaddlePaddle. Please reinstall PaddlePaddle with GPU support to call this API."
)
device_id = extract_cuda_device_id(device, op_name=name)
core.device_memory_stat_reset_peak_value("Allocated", device_id)


def reset_max_memory_reserved(device: _CudaPlaceLike | None = None) -> None:
'''
Reset the peak values of GPU memory reserved to the current values.
Reserved memory refers to the GPU memory that is held by the allocator of the given device.

Args:
device(paddle.CUDAPlace|int|str|None, optional): The device, the id of the device or
the string name of device like 'gpu:x'. If device is None, the device is the current device.
Default: None.

Examples:
.. code-block:: python

>>> # doctest: +REQUIRES(env:GPU)
>>> import paddle
>>> paddle.device.set_device('gpu')

>>> paddle.device.cuda.reset_max_memory_reserved(paddle.CUDAPlace(0))
>>> paddle.device.cuda.reset_max_memory_reserved(0)
>>> paddle.device.cuda.reset_max_memory_reserved("gpu:0")
'''

name = "paddle.device.cuda.reset_max_memory_reserved"
if not core.is_compiled_with_cuda():
raise ValueError(
f"The API {name} is not supported in CPU-only PaddlePaddle. Please reinstall PaddlePaddle with GPU support to call this API."
)
device_id = extract_cuda_device_id(device, op_name=name)
core.device_memory_stat_reset_peak_value("Reserved", device_id)


def memory_stats(device: _CudaPlaceLike | None = None) -> dict:
'''
Return the memory stats of the given device.

Args:
device(paddle.CUDAPlace|int|str|None, optional): The device, the id of the device or
the string name of device like 'gpu:x'. If device is None, the device is the current device.
Default: None.

Return:
dict: The current memory stats of the given device, including the current size of GPU memory that is allocated to tensor,
the peak size of GPU memory that is allocated to tensor, the current size of GPU memory that is held by the allocator and
the peak size of GPU memory that is held by the allocator, in bytes.

memory_stats["memory.allocated.current"]
memory_stats["memory.allocated.peak"]
memory_stats["memory.reserved.current"]
memory_stats["memory.reserved.peak"]

Examples:
.. code-block:: python

>>> # doctest: +REQUIRES(env:GPU)
>>> import paddle
>>> paddle.device.set_device('gpu')

>>> memory_stats = paddle.device.cuda.memory_stats(paddle.CUDAPlace(0))
>>> memory_stats = paddle.device.cuda.memory_stats(0)
>>> memory_stats = paddle.device.cuda.memory_stats("gpu:0")
'''

name = "paddle.device.cuda.memory_stats"
if not core.is_compiled_with_cuda():
raise ValueError(
f"The API {name} is not supported in CPU-only PaddlePaddle. Please reinstall PaddlePaddle with GPU support to call this API."
)
device_id = extract_cuda_device_id(device, op_name=name)
return core.device_memory_stats(device_id)


def memory_allocated(device: _CudaPlaceLike | None = None) -> int:
'''
Return the current size of gpu memory that is allocated to tensor of the given device.
Expand Down
30 changes: 27 additions & 3 deletions test/cpp/fluid/memory/stats_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,18 @@ class StatsTest : public ::testing::Test {
void SetFunc(
std::function<void(const std::string, int, int64_t)> update_func,
std::function<int64_t(const std::string, int)> current_value_func,
std::function<int64_t(const std::string, int)> peak_value_func) {
std::function<int64_t(const std::string, int)> peak_value_func,
std::function<void(const std::string, int)> reset_peak_value_func) {
update_func_ = update_func;
current_value_func_ = current_value_func;
peak_value_func_ = peak_value_func;
reset_peak_value_func_ = reset_peak_value_func;
}

void RunTests() {
MultiThreadReadWriteTest();
PeakValueTest();
ResetPeakValueTest();
}

private:
Expand Down Expand Up @@ -94,6 +97,18 @@ class StatsTest : public ::testing::Test {
EXPECT_EQ(peak_value_func_(stat_type_, 0), peak_value);
}

void ResetPeakValueTest() {
for (int64_t data : datas_) {
update_func_(stat_type_, 0, data);

EXPECT_GE(peak_value_func_(stat_type_, 0),
current_value_func_(stat_type_, 0));
reset_peak_value_func_(stat_type_, 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

在这个测试中,这一步如果reset不成功,是不是后面的检查也是会通过的?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

如果reset不成功,下一行(107行)的检查应该无法通过。

  void ResetPeakValueTest() {
    for (int64_t data : datas_) {
      update_func_(stat_type_, 0, data);

      EXPECT_GE(peak_value_func_(stat_type_, 0),
                current_value_func_(stat_type_, 0));
      // reset_peak_value_func_(stat_type_, 0);
      printf("data: %ld, Peak Value: %ld, Current Value: %ld\n",data, peak_value_func_(stat_type_, 0), current_value_func_(stat_type_, 0));
      EXPECT_EQ(peak_value_func_(stat_type_, 0),
                current_value_func_(stat_type_, 0));
    }
  }

如果将reset_peak_value_func_函数注释,测试将无法通过。下面为部分运行结果。

115: Test timeout computed to be: 10000000
115: [==========] Running 4 tests from 1 test case.
115: [----------] Global test environment set-up.
115: [----------] 4 tests from StatsTest
115: [ RUN      ] StatsTest.DeviceAllocatedTest
115: data: 543149808935355, Peak Value: 45703145873829393, Current Value: 45703145873829393
115: data: 634698327471328, Peak Value: 46337844201300721, Current Value: 46337844201300721
115: data: 706215795436611, Peak Value: 47044059996737332, Current Value: 47044059996737332
115: data: 577939367795333, Peak Value: 47621999364532665, Current Value: 47621999364532665
115: data: 419479490054362, Peak Value: 48041478854587027, Current Value: 48041478854587027
115: data: 21975227714595, Peak Value: 48063454082301622, Current Value: 48063454082301622
115: data: 812939817942250, Peak Value: 48876393900243872, Current Value: 48876393900243872
115: data: 984428837942082, Peak Value: 49860822738185954, Current Value: 49860822738185954
115: data: 537304104446806, Peak Value: 50398126842632760, Current Value: 50398126842632760
115: data: 685008544452453, Peak Value: 51083135387085213, Current Value: 51083135387085213
115: data: 563352858161268, Peak Value: 51646488245246481, Current Value: 51646488245246481
115: data: 690143831596330, Peak Value: 52336632076842811, Current Value: 52336632076842811
115: data: 964829938186077, Peak Value: 53301462015028888, Current Value: 53301462015028888
115: data: 476984078018245, Peak Value: 53778446093047133, Current Value: 53778446093047133
115: data: 804403365180177, Peak Value: 54582849458227310, Current Value: 54582849458227310
115: data: -57918691189304, Peak Value: 54582849458227310, Current Value: 54524930767038006
115: /home/aistudio/test/Paddle/test/cpp/fluid/memory/stats_test.cc:109: Failure
115: Expected equality of these values:
115:   peak_value_func_(stat_type_, 0)
115:     Which is: 54582849458227310
115:   current_value_func_(stat_type_, 0)
115:     Which is: 54524930767038006
115: data: 947611269236893, Peak Value: 55472542036274899, Current Value: 55472542036274899
115: data: 752188963801927, Peak Value: 56224731000076826, Current Value: 56224731000076826
115: data: 710946451346683, Peak Value: 56935677451423509, Current Value: 56935677451423509
115: data: -49226452527666, Peak Value: 56935677451423509, Current Value: 56886450998895843
115: /home/aistudio/test/Paddle/test/cpp/fluid/memory/stats_test.cc:109: Failure
115: Expected equality of these values:
115:   peak_value_func_(stat_type_, 0)
115:     Which is: 56935677451423509
115:   current_value_func_(stat_type_, 0)
115:     Which is: 56886450998895843
115: data: -59049377393968, Peak Value: 56935677451423509, Current Value: 56827401621501875
115: /home/aistudio/test/Paddle/test/cpp/fluid/memory/stats_test.cc:109: Failure
115: Expected equality of these values:
115:   peak_value_func_(stat_type_, 0)
115:     Which is: 56935677451423509
115:   current_value_func_(stat_type_, 0)
115:     Which is: 56827401621501875
115: data: 14128239868858, Peak Value: 56935677451423509, Current Value: 56841529861370733
115: /home/aistudio/test/Paddle/test/cpp/fluid/memory/stats_test.cc:109: Failure
115: Expected equality of these values:
115:   peak_value_func_(stat_type_, 0)
115:     Which is: 56935677451423509
115:   current_value_func_(stat_type_, 0)
115:     Which is: 56841529861370733
115: data: 463298869064035, Peak Value: 57304828730434768, Current Value: 57304828730434768

EXPECT_EQ(peak_value_func_(stat_type_, 0),
current_value_func_(stat_type_, 0));
}
}

std::string stat_type_;
std::vector<int64_t> datas_{
543149808935355, 634698327471328, 706215795436611, 577939367795333,
Expand Down Expand Up @@ -125,13 +140,15 @@ class StatsTest : public ::testing::Test {
std::function<void(const std::string, int, int64_t)> update_func_;
std::function<int64_t(const std::string, int)> current_value_func_;
std::function<int64_t(const std::string, int)> peak_value_func_;
std::function<void(const std::string, int)> reset_peak_value_func_;
};

TEST_F(StatsTest, DeviceAllocatedTest) {
SetStatType("Allocated");
SetFunc(DeviceMemoryStatUpdate,
DeviceMemoryStatCurrentValue,
DeviceMemoryStatPeakValue);
DeviceMemoryStatPeakValue,
DeviceMemoryStatResetPeakValue);
RunTests();
}

Expand All @@ -146,6 +163,9 @@ TEST_F(StatsTest, DeviceReservedMacroTest) {
},
[](const std::string stat_type, int id) {
return DEVICE_MEMORY_STAT_PEAK_VALUE(Reserved, id);
},
[](const std::string stat_type, int id) {
return DEVICE_MEMORY_STAT_RESET_PEAK_VALUE(Reserved, id);
});
RunTests();
}
Expand All @@ -161,6 +181,9 @@ TEST_F(StatsTest, HostAllocatedMacroTest) {
},
[](const std::string stat_type, int id) {
return HOST_MEMORY_STAT_PEAK_VALUE(Allocated, id);
},
[](const std::string stat_type, int id) {
return HOST_MEMORY_STAT_RESET_PEAK_VALUE(Allocated, id);
});
RunTests();
}
Expand All @@ -169,7 +192,8 @@ TEST_F(StatsTest, HostReservedTest) {
SetStatType("Reserved");
SetFunc(HostMemoryStatUpdate,
HostMemoryStatCurrentValue,
HostMemoryStatPeakValue);
HostMemoryStatPeakValue,
HostMemoryStatResetPeakValue);
RunTests();
}

Expand Down
Loading