diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..964712a78 --- /dev/null +++ b/.clang-format @@ -0,0 +1,8 @@ +--- +BasedOnStyle: LLVM +UseTab: Never +IndentWidth: 2 +ColumnLimit: 80 + +Language: Cpp +Standard: c++17 diff --git a/.editorconfig b/.editorconfig index 10ac9729a..a9e8a6df4 100644 --- a/.editorconfig +++ b/.editorconfig @@ -14,7 +14,10 @@ insert_final_newline = true indent_size = 4 [*.{cpp,hpp,cxx,cc,c,h,cu,cuh}] -indent_size = 4 +indent_size = 2 + +[{*.cmake,CMakeLists.txt}] +indent_size = 2 [*.{yaml,yml}] indent_size = 2 diff --git a/.gitattributes b/.gitattributes index 2f6d49472..bbb14db37 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,10 @@ +* text eol=lf +*.bat eol=crlf + +*.svg binary +*.jpg binary +*.jpeg binary +*.png binary +*.gif binary + *.h linguist-language=C++ diff --git a/.gitignore b/.gitignore index 5bcb6f773..eb96b1622 100644 --- a/.gitignore +++ b/.gitignore @@ -26,7 +26,14 @@ nnfusion.tar.gz # makeenv and test intermediate files tmp/ +.env +.envrc +.venv +env/ venv/ +ENV/ +env.bak/ +venv.bak/ .vscode/ .vs/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..2846e58ef --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,60 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +ci: + autofix_prs: true + autofix_commit_msg: "[Lint]: [pre-commit.ci] auto fixes [...]" + autoupdate_commit_msg: "[CI] [pre-commit.ci] autoupdate" + autoupdate_schedule: monthly +default_stages: [pre-commit, pre-push, manual] +exclude: '^(build|3rdparty)/.*$' # exclude build and 3rdparty directories +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: check-symlinks + - id: destroyed-symlinks + # FIXME: enable these hooks + # - id: trailing-whitespace + # - id: end-of-file-fixer + - id: check-added-large-files + - id: check-merge-conflict + fail_fast: true + # FIXME: enable these hooks + # - id: check-executables-have-shebangs + # - id: check-shebang-scripts-are-executable + - id: detect-private-key + - id: check-yaml + - id: check-toml + - id: check-ast + fail_fast: true + - id: debug-statements + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v15.0.7 # sync with requirements-lint.txt + hooks: + - id: clang-format + exclude: | + (?ix)( + ^.+\.(cu|cuh)$| + ^.+\.json$ + ) + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.14.0 # sync with requirements-lint.txt + hooks: + - id: ruff-check + args: [--fix, --exit-non-zero-on-fix] + - repo: https://github.com/google/yapf + rev: v0.43.0 # sync with requirements-lint.txt + hooks: + - id: yapf + args: [--recursive, --in-place] + - repo: https://github.com/codespell-project/codespell + rev: v2.4.1 # sync with requirements-lint.txt + hooks: + - id: codespell + additional_dependencies: [".[toml]"] + exclude: | + (?x)( + ^.+\.(cpp|hpp|cxx|cc|c|h|cu|cuh)$| + ^.+\.svg$| + ^.*\brequirements\b.*\.txt$ + ) diff --git a/CMakeLists.txt b/CMakeLists.txt index e40b7b027..80e9454fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,7 +56,7 @@ else() # Set default build type to RelWithDebInfo if not provided if(NOT CMAKE_BUILD_TYPE) - # Set default build type to Release if not provided + # Set default build type to Release if not provided set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) message(STATUS "Setting default build type to ${CMAKE_BUILD_TYPE}") endif() @@ -199,7 +199,7 @@ if(USE_CUDA) set(CUDA_MAJOR_VERSION ${CUDAToolkit_VERSION_MAJOR}) message(STATUS "Setting CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}") add_compile_definitions(CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}) - + list(APPEND TILE_LANG_INCLUDES ${CUDAToolkit_INCLUDE_DIRS}) endif(USE_CUDA) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 480f68d6e..e4b45e24b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,14 +2,19 @@ That would be awesome if you want to contribute something to TileLang! -- [Contributing](CONTRIBUTING.md#contributing) - - [Reporting Bugs](CONTRIBUTING.md#reporting-bugs) - - [Asking Questions](CONTRIBUTING.md#asking-questions) - - [Submitting Pull Requests](CONTRIBUTING.md#submitting-pull-requests) - - [Repository Setup](CONTRIBUTING.md#repository-setup) - - [Running Tests](CONTRIBUTING.md#running-tests) +### Table of Contents -## Reporting Bugs +- [Report Bugs](#report-bugs) +- [Ask Questions](#ask-questions) +- [Submit Pull Requests](#submit-pull-requests) +- [Setup Development Environment](#setup-development-environment) +- [Install Develop Version](#install-develop-version) +- [Lint Check](#lint-check) +- [Test Locally](#test-locally) +- [Build Wheels](#build-wheels) +- [Documentation](#documentation) + +## Report Bugs If you run into any weird behavior while using TileLang, feel free to open a new issue in this repository! Please run a **search before opening** a new issue, to make sure that someone else hasn't already reported or solved the bug you've found. @@ -18,35 +23,86 @@ Any issue you open must include: - Code snippet that reproduces the bug with a minimal setup. - A clear explanation of what the issue is. - -## Asking Questions +## Ask Questions Please ask questions in issues. -## Submitting Pull Requests +## Submit Pull Requests All pull requests are super welcomed and greatly appreciated! Issues in need of a solution are marked with a [`♥ help`](https://github.com/ianstormtaylor/TileLang/issues?q=is%3Aissue+is%3Aopen+label%3A%22%E2%99%A5+help%22) label if you're looking for somewhere to start. -Please run `./format.sh` before submitting a pull request to make sure that your code is formatted correctly. +If you're new to contributing to TileLang, you can follow the following guidelines before submitting a pull request. + +> [!NOTE] +> Please include tests and docs with every pull request if applicable! + +## Setup Development Environment + +Before contributing to TileLang, please follow the instructions below to setup. + +1. Fork TileLang ([fork](https://github.com/tile-ai/tilelang/fork)) on GitHub and clone the repository. + + ```bash + git clone --recurse-submodules git@github.com:/tilelang.git # use the SSH protocol + cd tilelang + + git remote add upstream git@github.com:tile-ai/tilelang.git + ``` + +2. Setup a development environment: + + ```bash + uv venv --seed .venv # use `python3 -m venv .venv` if you don't have `uv` + + source .venv/bin/activate + python3 -m pip install --upgrade pip setuptools wheel "build[uv]" + uv pip install --requirements requirements-dev.txt + ``` + +3. Setup the [`pre-commit`](https://pre-commit.com) hooks: + + ```bash + pre-commit install --install-hooks + ``` -Please include tests and docs with every pull request! +Then you are ready to rock. Thanks for contributing to TileLang! -## Repository Setup +## Install Develop Version -To run the build, you need to have the TileLang repository cloned to your computer. After that, you need to `cd` into the directory where you cloned it, and install the dependencies with `python`: +To install TileLang in an "editable" mode, run: ```bash -python setup.py install +python3 -m pip install --no-build-isolation --verbose --editable . ``` +in the main directory. This installation is removable by: -## Running Tests +```bash +python3 -m pip uninstall tilelang +``` + +## Lint Check + +To check the linting, run: + +```bash +pre-commit run --all-files +``` + +## Test Locally -To run the tests, start by building the project as described in the [Repository Setup](CONTRIBUTING.md#repository-setup) section. +To run the tests, start by building the project as described in the [Setup Development Environment](#setup-development-environment) section. Then you can rerun the tests with: -```text -python -m pytest testing +```bash +python3 -m pytest testing ``` +## Build Wheels + +_TBA_ + +## Documentation + +_TBA_ diff --git a/docs/deeplearning_operators/matmul.md b/docs/deeplearning_operators/matmul.md index 490d731e0..fea036ebe 100644 --- a/docs/deeplearning_operators/matmul.md +++ b/docs/deeplearning_operators/matmul.md @@ -8,7 +8,7 @@ :class: myclass1 myclass2 :name: a-tip-reference - This document is still **experimental** and may be incomplete. + This document is still **experimental** and may be incomplete. Suggestions and improvements are highly encouraged—please submit a PR! ::: @@ -256,4 +256,4 @@ For more advanced usage—including partial lowering, explicitly controlling thr * [BitBLAS](https://github.com/tile-ai/bitblas) * [Triton](https://github.com/openai/triton) * [Cutlass](https://github.com/NVIDIA/cutlass) -* [PyCUDA](https://documen.tician.de/pycuda/) +* [PyCUDA](https://documen.tician.de/pycuda/) diff --git a/examples/deepseek_v32/fp8_lighting_indexer.py b/examples/deepseek_v32/fp8_lighting_indexer.py index 64df55cbb..279dd91c7 100644 --- a/examples/deepseek_v32/fp8_lighting_indexer.py +++ b/examples/deepseek_v32/fp8_lighting_indexer.py @@ -258,6 +258,7 @@ def ref_fp8_mqa_logits(q: torch.Tensor, kv: torch.Tensor, weights: torch.Tensor, cost = mask.sum() return logits, cost + def test_fp8_lighting_indexer(S=4096, SKV=8192, H=32, HKV=1, D=64, kv_stride=1): q = torch.randn(S, H, D, device="cuda", dtype=torch.bfloat16).to(torch.bfloat16) kv = torch.randn(SKV, D, device="cuda", dtype=torch.bfloat16).to(torch.bfloat16) @@ -302,5 +303,6 @@ def logits_fn(): print(f"logits_tflops: {logits_tflops}, logits_ms: {logits_ms}") print(f"cost_ref: {cost_ref}") + if __name__ == "__main__": test_fp8_lighting_indexer() diff --git a/pyproject.toml b/pyproject.toml index 7193341dd..1d3755099 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,11 @@ skip = [ ".venv" ] +[tool.ruff] +target-version = "py38" +line-length = 100 +output-format = "full" + [tool.ruff.lint] select = [ # pycodestyle @@ -48,13 +53,17 @@ ignore = [ "E741", # line too long "E501", + # if-else-block instead of ternary + "SIM108", # key in dict.keys() "SIM118", # memory leaks "B019", + # zip without explicit strict + "B905", # No such file or directory "E902", ] [tool.ruff.lint.per-file-ignores] "3rdparty/**/*" = ["ALL"] -"examples/deepseek_v32/inference/**/*" = ["ALL"] \ No newline at end of file +"examples/deepseek_v32/inference/**/*" = ["ALL"] diff --git a/requirements-lint.txt b/requirements-lint.txt index 46737db5d..8025d3ce2 100644 --- a/requirements-lint.txt +++ b/requirements-lint.txt @@ -1,8 +1,7 @@ # formatting -yapf==0.40.2 -toml==0.10.2 -tomli==2.0.1 -ruff==0.6.5 -codespell==2.3.0 +pre-commit +yapf==0.43.0 +ruff==0.14.0 +codespell[toml]==2.4.1 clang-format==15.0.7 clang-tidy==18.1.8 diff --git a/setup.py b/setup.py index fc9a5ca59..d4c3152af 100644 --- a/setup.py +++ b/setup.py @@ -417,7 +417,7 @@ def patch_libs(libpath): subprocess.run([patchelf_path, '--set-rpath', '$ORIGIN', libpath]) -class TileLangBuilPydCommand(build_py): +class TileLangBuildPyCommand(build_py): """Customized setuptools install command - builds TVM after setting up LLVM.""" def run(self): @@ -643,7 +643,7 @@ def __init__(self, name, sourcedir=""): self.sourcedir = os.path.abspath(sourcedir) -class TilelangExtensionBuild(build_ext): +class TileLangExtensionBuild(build_ext): """ Custom build_ext command for CMake-based projects. @@ -929,8 +929,8 @@ def build_cmake(self, ext): CythonExtension("TileLangCython", sourcedir="."), ], cmdclass={ - "build_py": TileLangBuilPydCommand, + "build_py": TileLangBuildPyCommand, "sdist": TileLangSdistCommand, - "build_ext": TilelangExtensionBuild, + "build_ext": TileLangExtensionBuild, }, ) diff --git a/src/layout/gemm_layouts.cc b/src/layout/gemm_layouts.cc index 7be8afe8c..1fc07ae66 100644 --- a/src/layout/gemm_layouts.cc +++ b/src/layout/gemm_layouts.cc @@ -588,7 +588,7 @@ Layout makeGemmVoltaABLayout(int stride, int continuous, bool is_a, // ref: // https://github.com/nvidia/cutlass/blob/ad7b2f5e84fcfa124cb02b91d5bd26d238c0459e/include/cutlass/layout/tensor_op_multiplicand_sm75.h#L54 -// Althought the four settings (T or NT) used distinct layouts in CUTLASS, they +// Although the four settings (T or NT) used distinct layouts in CUTLASS, they // appeared to result in the same mem layout Layout makeTensorOpMultiplicand(int mat_stride, int mat_continuous, int elementsize, int crosswise) { diff --git a/src/op/parallel.cc b/src/op/parallel.cc index 9f1d92148..2a1135d7e 100644 --- a/src/op/parallel.cc +++ b/src/op/parallel.cc @@ -215,9 +215,9 @@ LayoutMap ParallelOpNode::InferLayout(const LayoutInferArgs &T, return {}; if (level == InferLevel::kStrict) { LayoutMap results; - // Deduce buffers that shoule be complicated replicated. + // Deduce buffers that should be complicated replicated. // For example: - // for i in T.Parllel(m): + // for i in T.Parallel(m): // fragment[0] = x[i] // then fragment[0] must be replicated on all threads. for (const auto &[buffer, indices] : indice_map_) { diff --git a/src/target/codegen_cuda.cc b/src/target/codegen_cuda.cc index 85c3dc4ae..728771d21 100644 --- a/src/target/codegen_cuda.cc +++ b/src/target/codegen_cuda.cc @@ -2210,7 +2210,7 @@ void CodeGenTileLangCUDA::VisitExpr_(const BufferLoadNode *op, DataType element_dtype = op->buffer->dtype; int lanes = op->dtype.lanes(); - // delcare type. + // declare type. if (value_dtype.lanes() == element_dtype.lanes()) { std::string ref = GetBufferRef(op->dtype, op->buffer.get(), index); HandleVolatileLoads(ref, op, os); diff --git a/src/target/ptx.h b/src/target/ptx.h index dffd6e351..68d5b04a3 100644 --- a/src/target/ptx.h +++ b/src/target/ptx.h @@ -258,7 +258,7 @@ std::string PrintArriveBarrierAsm(const std::string &barrier); * \brief Print ptx barrier arrival with expect tx operation using * mbarrier.arrive.expect_tx \param barrier: The name of the barrier in shared * memory. \param byte_count: Increases the tx count of the mbarrier object to - * track completion of addtional async transactions. + * track completion of additional async transactions. */ std::string PrintArriveBarrierExpectTxAsm(const std::string &barrier, const std::string &byte_count); diff --git a/src/transform/inject_assumes.cc b/src/transform/inject_assumes.cc index a2ddfc4a0..d4c8a53c8 100644 --- a/src/transform/inject_assumes.cc +++ b/src/transform/inject_assumes.cc @@ -33,8 +33,8 @@ class AssumeInjector : public tvm::tir::StmtExprMutator { }; tvm::StructuralHash sh; tvm::StructuralEqual se; - // grouped by expr, since the amount of varidic shape symbols is usualy much - // smaller than buffer + // grouped by expr, since the amount of variadic shape symbols is usually + // much smaller than buffer std::vector items; // hash => index in items std::unordered_map> buckets; diff --git a/src/transform/loop_vectorize_dynamic.cc b/src/transform/loop_vectorize_dynamic.cc index 0756fce43..d02582726 100644 --- a/src/transform/loop_vectorize_dynamic.cc +++ b/src/transform/loop_vectorize_dynamic.cc @@ -243,9 +243,9 @@ class VectorizedBodyMutator : public StmtExprMutator { std::vector conditions_; }; -class VectorizedConditionExtracter : public StmtExprVisitor { +class VectorizedConditionExtractor : public StmtExprVisitor { public: - VectorizedConditionExtracter() = default; + VectorizedConditionExtractor() = default; std::vector GetConditions(const Stmt &body) { this->VisitStmt(body); return conditions_; @@ -268,6 +268,9 @@ class VectorizedConditionExtracter : public StmtExprVisitor { std::vector conditions_; }; +// backward-compatibility: extracter -> extractor +using VectorizedConditionExtracter = VectorizedConditionExtractor; + class NestedLoopChecker : public StmtExprVisitor { public: NestedLoopChecker() : loop_num_(0) {} @@ -391,8 +394,8 @@ class VectorizeRewriterDynamic : public StmtExprMutator { vmap.Set(fnode->loop_var, outer_var * vector_size_ + inner_var); Stmt body = Substitute(fnode->body, vmap); - VectorizedConditionExtracter extracter; - std::vector conditions = extracter.GetConditions(body); + VectorizedConditionExtractor extractor; + std::vector conditions = extractor.GetConditions(body); VectorizedConditionMutator condition_mutator(inner_var, vector_size_); diff --git a/tilelang/jit/adapter/libgen.py b/tilelang/jit/adapter/libgen.py index 89f127f0c..5d1143a67 100644 --- a/tilelang/jit/adapter/libgen.py +++ b/tilelang/jit/adapter/libgen.py @@ -64,7 +64,7 @@ def compile_lib(self, timeout: float = None): verbose = self.verbose if is_cuda_target(target): from tilelang.env import CUTLASS_INCLUDE_DIR - src = tempfile.NamedTemporaryFile(mode="w", suffix=".cu", delete=False) + src = tempfile.NamedTemporaryFile(mode="w", suffix=".cu", delete=False) # noqa: SIM115 target_arch = get_target_arch(get_target_compute_version(target)) libpath = src.name.replace(".cu", ".so") @@ -111,7 +111,7 @@ def compile_lib(self, timeout: float = None): elif is_hip_target(target): from tilelang.env import COMPOSABLE_KERNEL_INCLUDE_DIR - src = tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False) + src = tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False) # noqa: SIM115 libpath = src.name.replace(".cpp", ".so") rocm_path = find_rocm_path() arch = get_rocm_arch(rocm_path) @@ -128,7 +128,7 @@ def compile_lib(self, timeout: float = None): ] elif is_cpu_target(target): from tilelang.contrib.cc import get_cplus_compiler - src = tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False) + src = tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False) # noqa: SIM115 libpath = src.name.replace(".cpp", ".so") command = [get_cplus_compiler(), "-std=c++17", "-fPIC", "-shared", src.name] @@ -228,7 +228,7 @@ def compile_lib(self, timeout: float = None): verbose = self.verbose if is_cuda_target(target): from tilelang.env import (CUDA_HOME, CUTLASS_INCLUDE_DIR, TILELANG_TEMPLATE_PATH) - src = tempfile.NamedTemporaryFile(mode="w", suffix=".cu", delete=False) + src = tempfile.NamedTemporaryFile(mode="w", suffix=".cu", delete=False) # noqa: SIM115 libpath = src.name.replace(".cu", ".cubin") project_root = osp.join(osp.dirname(__file__), "..", "..")