PaddlePaddle · gzy19990617 · Feb 20, 2025 · Feb 20, 2025 · Feb 20, 2025 · Feb 20, 2025
diff --git a/csrc/gpu/moe/tensorrt-llm-moe/README.md b/csrc/gpu/moe/tensorrt-llm-moe/README.md
@@ -0,0 +1,20 @@
+# NOTE
+
+# tensorrt-llm-moe
+Use TensorRT LLM MoE as a library
+
+### Build
+
+```shell
+python3 setup_cuda.py install
+```
+
+### Test
+
+#### end2end MoE test
+```shell
+python3 test_trtllm_moe.py
+```
+
+
+
diff --git a/csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/CMakeLists.txt b/csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/CMakeLists.txt
@@ -0,0 +1,22 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION &
+# AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+#
+file(GLOB SRCS *.cpp)
+file(GLOB CU_SRCS *.cu)
+
+add_library(common_src OBJECT ${SRCS} ${CU_SRCS})
+set_property(TARGET common_src PROPERTY POSITION_INDEPENDENT_CODE ON)
+set_property(TARGET common_src PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
diff --git a/csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/algorithm.h b/csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/algorithm.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace tensorrt_llm
+{
+
+// Base class for algorithms
+struct Algorithm
+{
+    Algorithm() = default;
+    Algorithm(Algorithm&&) = default;
+    Algorithm& operator=(Algorithm&&) = default;
+    Algorithm(Algorithm const&) = delete;
+    Algorithm& operator=(Algorithm const&) = delete;
+};
+
+} // namespace tensorrt_llm
diff --git a/csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/arrayView.h b/csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/arrayView.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "tensorrt_llm/common/assert.h"
+#include <cstdint>
+
+namespace tensorrt_llm::common
+{
+
+//!
+//! \brief A very rudimentary implementation of std::span.
+//!
+template <typename T>
+class ArrayView
+{
+public:
+    using value_type = T;
+    using size_type = std::size_t;
+    using reference = value_type&;
+    using const_reference = value_type const&;
+    using pointer = T*;
+    using const_pointer = T const*;
+    using iterator = pointer;
+    using const_iterator = const_pointer;
+
+    ArrayView(T* data, size_type size)
+        : mData{data}
+        , mSize{size}
+    {
+    }
+
+    [[nodiscard]] iterator begin()
+    {
+        return mData;
+    }
+
+    [[nodiscard]] iterator end()
+    {
+        return mData + mSize;
+    }
+
+    [[nodiscard]] const_iterator begin() const
+    {
+        return mData;
+    }
+
+    [[nodiscard]] const_iterator end() const
+    {
+        return mData + mSize;
+    }
+
+    [[nodiscard]] const_iterator cbegin() const
+    {
+        return mData;
+    }
+
+    [[nodiscard]] const_iterator cend() const
+    {
+        return mData + mSize;
+    }
+
+    [[nodiscard]] size_type size() const
+    {
+        return mSize;
+    }
+
+    [[nodiscard]] reference operator[](size_type index)
+    {
+#ifdef INDEX_RANGE_CHECK
+        TLLM_CHECK_WITH_INFO(index < mSize, "Index %lu is out of bounds [0, %lu)", index, mSize);
+#endif
+        return mData[index];
+    }
+
+    [[nodiscard]] const_reference operator[](size_type index) const
+    {
+#ifdef INDEX_RANGE_CHECK
+        TLLM_CHECK_WITH_INFO(index < mSize, "Index %lu is out of bounds [0, %lu)", index, mSize);
+#endif
+        return mData[index];
+    }
+
+private:
+    T* mData;
+    size_type mSize;
+};
+
+} // namespace tensorrt_llm::common
diff --git a/csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/assert.cpp b/csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/assert.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorrt_llm/common/assert.h"
+
+namespace
+{
+
+bool initCheckDebug()
+{
+    auto constexpr kDebugEnabled = "TLLM_DEBUG_MODE";
+    auto const debugEnabled = std::getenv(kDebugEnabled);
+    return debugEnabled && debugEnabled[0] == '1';
+}
+} // namespace
+
+bool DebugConfig::isCheckDebugEnabled()
+{
+    static bool const debugEnabled = initCheckDebug();
+    return debugEnabled;
+}
diff --git a/csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/assert.h b/csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/assert.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "tensorrt_llm/common/stringUtils.h"
+#include "tensorrt_llm/common/tllmException.h"
+
+#include <string>
+
+namespace tensorrt_llm::common
+{
+[[noreturn]] inline void throwRuntimeError(char const* const file, int const line, std::string const& info = "")
+{
+    throw TllmException(file, line, fmtstr("[TensorRT-LLM][ERROR] Assertion failed: %s", info.c_str()));
+}
+
+} // namespace tensorrt_llm::common
+
+class DebugConfig
+{
+public:
+    static bool isCheckDebugEnabled();
+};
+
+#if defined(_WIN32)
+#define TLLM_LIKELY(x) (__assume((x) == 1), (x))
+#define TLLM_UNLIKELY(x) (__assume((x) == 0), (x))
+#else
+#define TLLM_LIKELY(x) __builtin_expect((x), 1)
+#define TLLM_UNLIKELY(x) __builtin_expect((x), 0)
+#endif
+
+#define TLLM_CHECK(val)                                                                                                \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        TLLM_LIKELY(static_cast<bool>(val)) ? ((void) 0)                                                               \
+                                            : tensorrt_llm::common::throwRuntimeError(__FILE__, __LINE__, #val);       \
+    } while (0)
+
+#define TLLM_CHECK_WITH_INFO(val, info, ...)                                                                           \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        TLLM_LIKELY(static_cast<bool>(val))                                                                            \
+        ? ((void) 0)                                                                                                   \
+        : tensorrt_llm::common::throwRuntimeError(                                                                     \
+            __FILE__, __LINE__, tensorrt_llm::common::fmtstr(info, ##__VA_ARGS__));                                    \
+    } while (0)
+
+#define TLLM_CHECK_DEBUG(val)                                                                                          \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (TLLM_UNLIKELY(DebugConfig::isCheckDebugEnabled()))                                                         \
+        {                                                                                                              \
+            TLLM_LIKELY(static_cast<bool>(val)) ? ((void) 0)                                                           \
+                                                : tensorrt_llm::common::throwRuntimeError(__FILE__, __LINE__, #val);   \
+        }                                                                                                              \
+    } while (0)
+
+#define TLLM_CHECK_DEBUG_WITH_INFO(val, info, ...)                                                                     \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (TLLM_UNLIKELY(DebugConfig::isCheckDebugEnabled()))                                                         \
+        {                                                                                                              \
+            TLLM_LIKELY(static_cast<bool>(val))                                                                        \
+            ? ((void) 0)                                                                                               \
+            : tensorrt_llm::common::throwRuntimeError(                                                                 \
+                __FILE__, __LINE__, tensorrt_llm::common::fmtstr(info, ##__VA_ARGS__));                                \
+        }                                                                                                              \
+    } while (0)
+
+#define TLLM_THROW(...)                                                                                                \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        throw NEW_TLLM_EXCEPTION(__VA_ARGS__);                                                                         \
+    } while (0)
+
+#define TLLM_WRAP(ex)                                                                                                  \
+    NEW_TLLM_EXCEPTION("%s: %s", tensorrt_llm::common::TllmException::demangle(typeid(ex).name()).c_str(), ex.what())