-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
500 additions
and
165 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the MIT license. | ||
|
||
#ifndef NPKIT_H_ | ||
#define NPKIT_H_ | ||
|
||
#include <mscclpp/device.hpp> | ||
#include <mscclpp/gpu_utils.hpp> | ||
#include <mscclpp/npkit/npkit_event.hpp> | ||
#include <mscclpp/npkit/npkit_struct.hpp> | ||
#include <string> | ||
#include <thread> | ||
#include <vector> | ||
|
||
#if defined(__HIP_PLATFORM_AMD__) | ||
#define NPKIT_GET_GPU_TIMESTAMP wall_clock64 | ||
#else | ||
#define NPKIT_GET_GPU_TIMESTAMP clock64 | ||
#endif | ||
|
||
#define NPKIT_SHM_NUM_EVENTS 64 | ||
|
||
class NpKit { | ||
public: | ||
static const uint64_t kNumGpuEventBuffers = 1024; | ||
|
||
static const uint64_t kNumCpuEventBuffers = 64; | ||
|
||
static void Init(int rank); | ||
|
||
static void Dump(const std::string& dump_dir); | ||
|
||
static void Shutdown(); | ||
|
||
static NpKitEventCollectContext* GetGpuEventCollectContexts(); | ||
|
||
#if defined(MSCCLPP_DEVICE_COMPILE) | ||
static MSCCLPP_DEVICE_INLINE void CollectGpuEventShm(uint8_t type, uint32_t size, uint32_t rsvd, uint64_t timestamp, | ||
NpKitEvent* event_buffer, uint64_t* event_buffer_head) { | ||
if (*event_buffer_head < NPKIT_SHM_NUM_EVENTS) { | ||
if (threadIdx.x == 0) { | ||
NpKitEvent& event = event_buffer[*event_buffer_head]; | ||
event.fields.type = type; | ||
event.fields.size = size; | ||
event.fields.rsvd = rsvd; | ||
event.fields.timestamp = timestamp; | ||
} | ||
(*event_buffer_head)++; | ||
} | ||
} | ||
|
||
static MSCCLPP_DEVICE_INLINE void StoreGpuEventShm(NpKitEventCollectContext* npKitEventCollectContexts, | ||
NpKitEvent* event_buffer, uint64_t event_buffer_head) { | ||
#if defined(MSCCLPP_DEVICE_HIP) | ||
__synclds(); | ||
#else // !defined(MSCCLPP_DEVICE_HIP) | ||
__syncthreads(); | ||
#endif // !defined(MSCCLPP_DEVICE_HIP) | ||
NpKitEventCollectContext* npKitCtx = npKitEventCollectContexts + blockIdx.x; | ||
NpKitEvent* global_event_buffer = npKitCtx->event_buffer; | ||
uint64_t global_event_buffer_head = npKitCtx->event_buffer_head; | ||
for (size_t i = threadIdx.x; i < event_buffer_head * sizeof(NpKitEvent) / sizeof(int4); i += blockDim.x) { | ||
((int4*)(global_event_buffer + global_event_buffer_head))[i] = ((int4*)event_buffer)[i]; | ||
} | ||
if (threadIdx.x == 0) { | ||
npKitCtx->event_buffer_head += event_buffer_head; | ||
} | ||
} | ||
#endif | ||
|
||
static void CollectCpuEvent(uint8_t type, uint32_t size, uint32_t rsvd, uint64_t timestamp, int channel_id); | ||
|
||
static uint64_t* GetCpuTimestamp(); | ||
|
||
private: | ||
static void CpuTimestampUpdateThread(); | ||
|
||
// 64K * 1024 * 16B = 1GB per GPU | ||
static const uint64_t kMaxNumGpuEventsPerBuffer = 1ULL << 16; | ||
|
||
// 64K * 2 (send/recv) * (1024/64) = 2M, 2M * 64 * 16B = 2GB per CPU | ||
static const uint64_t kMaxNumCpuEventsPerBuffer = 1ULL << 21; | ||
|
||
static std::vector<mscclpp::UniqueCudaPtr<NpKitEvent>> gpu_event_buffers_; | ||
static std::vector<std::unique_ptr<NpKitEvent[]>> cpu_event_buffers_; | ||
|
||
static mscclpp::UniqueCudaPtr<NpKitEventCollectContext> gpu_collect_contexts_; | ||
static std::unique_ptr<NpKitEventCollectContext[]> cpu_collect_contexts_; | ||
|
||
static uint64_t rank_; | ||
|
||
static mscclpp::UniqueCudaHostPtr<uint64_t> cpu_timestamp_; | ||
static std::unique_ptr<std::thread> cpu_timestamp_update_thread_; | ||
static volatile bool cpu_timestamp_update_thread_should_stop_; | ||
}; | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the MIT license. | ||
|
||
#ifndef NPKIT_EVENT_H_ | ||
#define NPKIT_EVENT_H_ | ||
|
||
#define NPKIT_EVENT_INVALID 0x0 | ||
|
||
#define NPKIT_EVENT_TIME_SYNC_GPU 0x1 | ||
#define NPKIT_EVENT_TIME_SYNC_CPU 0x2 | ||
|
||
#define NPKIT_EVENT_EXECUTOR_INIT_ENTRY 0x3 | ||
#define NPKIT_EVENT_EXECUTOR_INIT_EXIT 0x4 | ||
|
||
#define NPKIT_EVENT_EXECUTOR_OP_BASE_ENTRY 0x5 | ||
#define NPKIT_EVENT_EXECUTOR_OP_BASE_EXIT 0x15 | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,4 +25,4 @@ struct NpKitEventCollectContext { | |
|
||
#pragma pack(pop) | ||
|
||
#endif | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ | |
PacketType, | ||
version, | ||
is_nvls_supported, | ||
npkit, | ||
) | ||
|
||
__version__ = version() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the MIT license. | ||
|
||
#include <nanobind/nanobind.h> | ||
#include <nanobind/stl/string.h> | ||
|
||
#include <mscclpp/npkit/npkit.hpp> | ||
|
||
namespace nb = nanobind; | ||
|
||
void register_npkit(nb::module_ &m) { | ||
nb::module_ sub_m = m.def_submodule("npkit", "NPKit functions"); | ||
sub_m.def("init", &NpKit::Init); | ||
sub_m.def("dump", &NpKit::Dump); | ||
sub_m.def("shutdown", &NpKit::Shutdown); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.