Skip to content

Commit 20efa23

Browse files
committed
Revert "[Disco][3rdparty] Add latency optimized all reduce kernels."
This reverts commit ee5c994.
1 parent ee5c994 commit 20efa23

File tree

5 files changed

+0
-23
lines changed

5 files changed

+0
-23
lines changed

.gitmodules

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,3 @@
3131
[submodule "3rdparty/flashinfer"]
3232
path = 3rdparty/flashinfer
3333
url = https://github.com/flashinfer-ai/flashinfer.git
34-
[submodule "3rdparty/trt-llm-allreduce"]
35-
path = 3rdparty/trt-llm-allreduce
36-
url = [email protected]:csullivan/trt-llm-allreduce.git

3rdparty/trt-llm-allreduce

Lines changed: 0 additions & 1 deletion
This file was deleted.

CMakeLists.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -908,9 +908,6 @@ if(USE_CUDA AND USE_NCCL)
908908
find_library(LIBRT rt)
909909
target_link_libraries(tvm PRIVATE nccl ${LIBRT})
910910
target_link_libraries(tvm_runtime PRIVATE nccl ${LIBRT})
911-
install(TARGETS trtllm_allreduce EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
912-
target_link_libraries(tvm PRIVATE -Wl,--no-as-needed trtllm_allreduce)
913-
target_link_libraries(tvm_runtime PRIVATE -Wl,--no-as-needed trtllm_allreduce)
914911
endif()
915912

916913
if(USE_ROCM AND USE_RCCL)

cmake/modules/CUDA.cmake

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,6 @@ if(USE_CUDA)
4747
set(CMAKE_CUDA_ARCHITECTURES native)
4848
endif()
4949

50-
if(USE_CUDA AND USE_NCCL)
51-
add_subdirectory(${PROJECT_SOURCE_DIR}/3rdparty/trt-llm-allreduce)
52-
endif()
53-
5450
if(USE_CUDNN)
5551
message(STATUS "Build with cuDNN support")
5652
include_directories(SYSTEM ${CUDA_CUDNN_INCLUDE_DIRS})

src/runtime/disco/nccl/nccl.cc

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include <tvm/runtime/registry.h>
2525

2626
#include <cstring>
27-
#include <memory>
2827
#include <mutex>
2928
#include <sstream>
3029
#include <vector>
@@ -39,7 +38,6 @@
3938
#if TVM_NCCL_RCCL_SWITCH == 0
4039
#include <nccl.h>
4140

42-
#include "../../../../3rdparty/trt-llm-allreduce/include/cuda_allreduce.h"
4341
#include "../../cuda/cuda_common.h"
4442
#else
4543
#include <rccl/rccl.h>
@@ -142,7 +140,6 @@ struct CCLThreadLocalContext {
142140
int device_id;
143141
deviceStream_t default_stream = nullptr;
144142
ncclComm_t comm;
145-
std::unique_ptr<CustomAllReduce> custom_allreduce;
146143

147144
void Clear() {
148145
NCCL_CALL(ncclCommDestroy(comm));
@@ -193,8 +190,6 @@ void InitCCLPerWorker(IntTuple device_ids, std::string unique_id_bytes) {
193190
worker->ccl = TVM_DISCO_CCL_NAME;
194191
ctx->worker = worker;
195192
ctx->device_id = device_id;
196-
ctx->custom_allreduce =
197-
std::make_unique<CustomAllReduce>(worker->num_workers, worker->worker_id, ctx->comm);
198193
// Initialize the communicator
199194
ncclUniqueId id;
200195
std::memcpy(id.internal, unique_id_bytes.data(), NCCL_UNIQUE_ID_BYTES);
@@ -206,13 +201,6 @@ void AllReduce(NDArray send, ReduceKind reduce_kind, NDArray recv) {
206201
ShapeTuple shape = send.Shape();
207202
int64_t numel = shape->Product();
208203
deviceStream_t stream = ctx->GetDefaultStream();
209-
// TODO(csullivan) make this work
210-
// 1. pass type in
211-
// 2. src and dest args
212-
// 3. some strategy selection outside, if (!enqueu) do nccl?
213-
// 3. reduce kind
214-
// 4. pass stream in to custom api
215-
// ctx->custom_allreduce->enqueue(send->data, numel);
216204
NCCL_CALL(ncclAllReduce(send->data, recv->data, numel,
217205
/*datatype=*/AsNCCLDataType(DataType(send->dtype)),
218206
/*op=*/AsNCCLRedOp(reduce_kind), ctx->comm, stream));

0 commit comments

Comments
 (0)