Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

trt moe #9915

Open
wants to merge 9 commits into
base: develop
Choose a base branch
from
Open

trt moe #9915

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions csrc/gpu/moe/tensorrt-llm-moe/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# NOTE

# tensorrt-llm-moe
Use TensorRT LLM MoE as a library

### Build

```shell
python3 setup_cuda.py install
```

### Test

#### end2end MoE test
```shell
python3 test_trtllm_moe.py
```



Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#
# SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION &
# AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
#
file(GLOB SRCS *.cpp)
file(GLOB CU_SRCS *.cu)

add_library(common_src OBJECT ${SRCS} ${CU_SRCS})
set_property(TARGET common_src PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET common_src PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
32 changes: 32 additions & 0 deletions csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/algorithm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

namespace tensorrt_llm
{

// Base class for algorithms
struct Algorithm
{
Algorithm() = default;
Algorithm(Algorithm&&) = default;
Algorithm& operator=(Algorithm&&) = default;
Algorithm(Algorithm const&) = delete;
Algorithm& operator=(Algorithm const&) = delete;
};

} // namespace tensorrt_llm
103 changes: 103 additions & 0 deletions csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/arrayView.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "tensorrt_llm/common/assert.h"
#include <cstdint>

namespace tensorrt_llm::common
{

//!
//! \brief A very rudimentary implementation of std::span.
//!
template <typename T>
class ArrayView
{
public:
using value_type = T;
using size_type = std::size_t;
using reference = value_type&;
using const_reference = value_type const&;
using pointer = T*;
using const_pointer = T const*;
using iterator = pointer;
using const_iterator = const_pointer;

ArrayView(T* data, size_type size)
: mData{data}
, mSize{size}
{
}

[[nodiscard]] iterator begin()
{
return mData;
}

[[nodiscard]] iterator end()
{
return mData + mSize;
}

[[nodiscard]] const_iterator begin() const
{
return mData;
}

[[nodiscard]] const_iterator end() const
{
return mData + mSize;
}

[[nodiscard]] const_iterator cbegin() const
{
return mData;
}

[[nodiscard]] const_iterator cend() const
{
return mData + mSize;
}

[[nodiscard]] size_type size() const
{
return mSize;
}

[[nodiscard]] reference operator[](size_type index)
{
#ifdef INDEX_RANGE_CHECK
TLLM_CHECK_WITH_INFO(index < mSize, "Index %lu is out of bounds [0, %lu)", index, mSize);
#endif
return mData[index];
}

[[nodiscard]] const_reference operator[](size_type index) const
{
#ifdef INDEX_RANGE_CHECK
TLLM_CHECK_WITH_INFO(index < mSize, "Index %lu is out of bounds [0, %lu)", index, mSize);
#endif
return mData[index];
}

private:
T* mData;
size_type mSize;
};

} // namespace tensorrt_llm::common
34 changes: 34 additions & 0 deletions csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/assert.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "tensorrt_llm/common/assert.h"

namespace
{

bool initCheckDebug()
{
auto constexpr kDebugEnabled = "TLLM_DEBUG_MODE";
auto const debugEnabled = std::getenv(kDebugEnabled);
return debugEnabled && debugEnabled[0] == '1';
}
} // namespace

bool DebugConfig::isCheckDebugEnabled()
{
static bool const debugEnabled = initCheckDebug();
return debugEnabled;
}
92 changes: 92 additions & 0 deletions csrc/gpu/moe/tensorrt-llm-moe/cpp/tensorrt_llm/common/assert.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "tensorrt_llm/common/stringUtils.h"
#include "tensorrt_llm/common/tllmException.h"

#include <string>

namespace tensorrt_llm::common
{
[[noreturn]] inline void throwRuntimeError(char const* const file, int const line, std::string const& info = "")
{
throw TllmException(file, line, fmtstr("[TensorRT-LLM][ERROR] Assertion failed: %s", info.c_str()));
}

} // namespace tensorrt_llm::common

class DebugConfig
{
public:
static bool isCheckDebugEnabled();
};

#if defined(_WIN32)
#define TLLM_LIKELY(x) (__assume((x) == 1), (x))
#define TLLM_UNLIKELY(x) (__assume((x) == 0), (x))
#else
#define TLLM_LIKELY(x) __builtin_expect((x), 1)
#define TLLM_UNLIKELY(x) __builtin_expect((x), 0)
#endif

#define TLLM_CHECK(val) \
do \
{ \
TLLM_LIKELY(static_cast<bool>(val)) ? ((void) 0) \
: tensorrt_llm::common::throwRuntimeError(__FILE__, __LINE__, #val); \
} while (0)

#define TLLM_CHECK_WITH_INFO(val, info, ...) \
do \
{ \
TLLM_LIKELY(static_cast<bool>(val)) \
? ((void) 0) \
: tensorrt_llm::common::throwRuntimeError( \
__FILE__, __LINE__, tensorrt_llm::common::fmtstr(info, ##__VA_ARGS__)); \
} while (0)

#define TLLM_CHECK_DEBUG(val) \
do \
{ \
if (TLLM_UNLIKELY(DebugConfig::isCheckDebugEnabled())) \
{ \
TLLM_LIKELY(static_cast<bool>(val)) ? ((void) 0) \
: tensorrt_llm::common::throwRuntimeError(__FILE__, __LINE__, #val); \
} \
} while (0)

#define TLLM_CHECK_DEBUG_WITH_INFO(val, info, ...) \
do \
{ \
if (TLLM_UNLIKELY(DebugConfig::isCheckDebugEnabled())) \
{ \
TLLM_LIKELY(static_cast<bool>(val)) \
? ((void) 0) \
: tensorrt_llm::common::throwRuntimeError( \
__FILE__, __LINE__, tensorrt_llm::common::fmtstr(info, ##__VA_ARGS__)); \
} \
} while (0)

#define TLLM_THROW(...) \
do \
{ \
throw NEW_TLLM_EXCEPTION(__VA_ARGS__); \
} while (0)

#define TLLM_WRAP(ex) \
NEW_TLLM_EXCEPTION("%s: %s", tensorrt_llm::common::TllmException::demangle(typeid(ex).name()).c_str(), ex.what())
Loading
Loading