diff --git a/.clang-format b/.clang-format
new file mode 100644
index 000000000..964712a78
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,8 @@
+---
+BasedOnStyle: LLVM
+UseTab: Never
+IndentWidth: 2
+ColumnLimit: 80
+
+Language: Cpp
+Standard: c++17
diff --git a/.editorconfig b/.editorconfig
index 10ac9729a..a9e8a6df4 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -14,7 +14,10 @@ insert_final_newline = true
 indent_size = 4
 
 [*.{cpp,hpp,cxx,cc,c,h,cu,cuh}]
-indent_size = 4
+indent_size = 2
+
+[{*.cmake,CMakeLists.txt}]
+indent_size = 2
 
 [*.{yaml,yml}]
 indent_size = 2
diff --git a/.gitattributes b/.gitattributes
index 2f6d49472..bbb14db37 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,10 @@
+* text eol=lf
+*.bat eol=crlf
+
+*.svg binary
+*.jpg binary
+*.jpeg binary
+*.png binary
+*.gif binary
+
 *.h linguist-language=C++
diff --git a/.gitignore b/.gitignore
index 5bcb6f773..eb96b1622 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,7 +26,14 @@ nnfusion.tar.gz
 # makeenv and test intermediate files
 tmp/
 
+.env
+.envrc
+.venv
+env/
 venv/
+ENV/
+env.bak/
+venv.bak/
 .vscode/
 .vs/
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 000000000..2846e58ef
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,60 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+ci:
+  autofix_prs: true
+  autofix_commit_msg: "[Lint]: [pre-commit.ci] auto fixes [...]"
+  autoupdate_commit_msg: "[CI] [pre-commit.ci] autoupdate"
+  autoupdate_schedule: monthly
+default_stages: [pre-commit, pre-push, manual]
+exclude: '^(build|3rdparty)/.*$'  # exclude build and 3rdparty directories
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v6.0.0
+    hooks:
+      - id: check-symlinks
+      - id: destroyed-symlinks
+      # FIXME: enable these hooks
+      # - id: trailing-whitespace
+      # - id: end-of-file-fixer
+      - id: check-added-large-files
+      - id: check-merge-conflict
+        fail_fast: true
+      # FIXME: enable these hooks
+      # - id: check-executables-have-shebangs
+      # - id: check-shebang-scripts-are-executable
+      - id: detect-private-key
+      - id: check-yaml
+      - id: check-toml
+      - id: check-ast
+        fail_fast: true
+      - id: debug-statements
+  - repo: https://github.com/pre-commit/mirrors-clang-format
+    rev: v15.0.7  # sync with requirements-lint.txt
+    hooks:
+      - id: clang-format
+        exclude: |
+          (?ix)(
+            ^.+\.(cu|cuh)$|
+            ^.+\.json$
+          )
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.14.0  # sync with requirements-lint.txt
+    hooks:
+      - id: ruff-check
+        args: [--fix, --exit-non-zero-on-fix]
+  - repo: https://github.com/google/yapf
+    rev: v0.43.0  # sync with requirements-lint.txt
+    hooks:
+      - id: yapf
+        args: [--recursive, --in-place]
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.4.1  # sync with requirements-lint.txt
+    hooks:
+      - id: codespell
+        additional_dependencies: [".[toml]"]
+        exclude: |
+          (?x)(
+            ^.+\.(cpp|hpp|cxx|cc|c|h|cu|cuh)$|
+            ^.+\.svg$|
+            ^.*\brequirements\b.*\.txt$
+          )
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e40b7b027..80e9454fc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -56,7 +56,7 @@ else()
 
   # Set default build type to RelWithDebInfo if not provided
   if(NOT CMAKE_BUILD_TYPE)
-  # Set default build type to Release if not provided
+    # Set default build type to Release if not provided
     set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
     message(STATUS "Setting default build type to ${CMAKE_BUILD_TYPE}")
   endif()
@@ -199,7 +199,7 @@ if(USE_CUDA)
   set(CUDA_MAJOR_VERSION ${CUDAToolkit_VERSION_MAJOR})
   message(STATUS "Setting CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}")
   add_compile_definitions(CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION})
-  
+
   list(APPEND TILE_LANG_INCLUDES ${CUDAToolkit_INCLUDE_DIRS})
 endif(USE_CUDA)
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 480f68d6e..e4b45e24b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,14 +2,19 @@
 
 That would be awesome if you want to contribute something to TileLang!
 
-- [Contributing](CONTRIBUTING.md#contributing)
-  - [Reporting Bugs](CONTRIBUTING.md#reporting-bugs)
-  - [Asking Questions](CONTRIBUTING.md#asking-questions)
-  - [Submitting Pull Requests](CONTRIBUTING.md#submitting-pull-requests)
-  - [Repository Setup](CONTRIBUTING.md#repository-setup)
-  - [Running Tests](CONTRIBUTING.md#running-tests)
+### Table of Contents  <!-- omit in toc --> <!-- markdownlint-disable heading-increment -->
 
-## Reporting Bugs
+- [Report Bugs](#report-bugs)
+- [Ask Questions](#ask-questions)
+- [Submit Pull Requests](#submit-pull-requests)
+- [Setup Development Environment](#setup-development-environment)
+- [Install Develop Version](#install-develop-version)
+- [Lint Check](#lint-check)
+- [Test Locally](#test-locally)
+- [Build Wheels](#build-wheels)
+- [Documentation](#documentation)
+
+## Report Bugs
 
 If you run into any weird behavior while using TileLang, feel free to open a new issue in this repository! Please run a **search before opening** a new issue, to make sure that someone else hasn't already reported or solved the bug you've found.
 
@@ -18,35 +23,86 @@ Any issue you open must include:
 - Code snippet that reproduces the bug with a minimal setup.
 - A clear explanation of what the issue is.
 
-
-## Asking Questions
+## Ask Questions
 
 Please ask questions in issues.
 
-## Submitting Pull Requests
+## Submit Pull Requests
 
 All pull requests are super welcomed and greatly appreciated! Issues in need of a solution are marked with a [`♥ help`](https://github.com/ianstormtaylor/TileLang/issues?q=is%3Aissue+is%3Aopen+label%3A%22%E2%99%A5+help%22) label if you're looking for somewhere to start.
 
-Please run `./format.sh` before submitting a pull request to make sure that your code is formatted correctly.
+If you're new to contributing to TileLang, you can follow the following guidelines before submitting a pull request.
+
+> [!NOTE]
+> Please include tests and docs with every pull request if applicable!
+
+## Setup Development Environment
+
+Before contributing to TileLang, please follow the instructions below to setup.
+
+1. Fork TileLang ([fork](https://github.com/tile-ai/tilelang/fork)) on GitHub and clone the repository.
+
+    ```bash
+    git clone --recurse-submodules git@github.com:<your username>/tilelang.git  # use the SSH protocol
+    cd tilelang
+
+    git remote add upstream git@github.com:tile-ai/tilelang.git
+    ```
+
+2. Setup a development environment:
+
+    ```bash
+    uv venv --seed .venv  # use `python3 -m venv .venv` if you don't have `uv`
+
+    source .venv/bin/activate
+    python3 -m pip install --upgrade pip setuptools wheel "build[uv]"
+    uv pip install --requirements requirements-dev.txt
+    ```
+
+3. Setup the [`pre-commit`](https://pre-commit.com) hooks:
+
+    ```bash
+    pre-commit install --install-hooks
+    ```
 
-Please include tests and docs with every pull request!
+Then you are ready to rock. Thanks for contributing to TileLang!
 
-## Repository Setup
+## Install Develop Version
 
-To run the build, you need to have the TileLang repository cloned to your computer. After that, you need to `cd` into the directory where you cloned it, and install the dependencies with `python`:
+To install TileLang in an "editable" mode, run:
 
 ```bash
-python setup.py install
+python3 -m pip install --no-build-isolation --verbose --editable .
 ```
 
+in the main directory. This installation is removable by:
 
-## Running Tests
+```bash
+python3 -m pip uninstall tilelang
+```
+
+## Lint Check
+
+To check the linting, run:
+
+```bash
+pre-commit run --all-files
+```
+
+## Test Locally
 
-To run the tests, start by building the project as described in the [Repository Setup](CONTRIBUTING.md#repository-setup) section.
+To run the tests, start by building the project as described in the [Setup Development Environment](#setup-development-environment) section.
 
 Then you can rerun the tests with:
 
-```text
-python -m pytest testing
+```bash
+python3 -m pytest testing
 ```
 
+## Build Wheels
+
+_TBA_
+
+## Documentation
+
+_TBA_
diff --git a/docs/deeplearning_operators/matmul.md b/docs/deeplearning_operators/matmul.md
index 490d731e0..fea036ebe 100644
--- a/docs/deeplearning_operators/matmul.md
+++ b/docs/deeplearning_operators/matmul.md
@@ -8,7 +8,7 @@
 :class: myclass1 myclass2
 :name: a-tip-reference
 
-   This document is still **experimental** and may be incomplete.  
+   This document is still **experimental** and may be incomplete.
    Suggestions and improvements are highly encouraged—please submit a PR!
 :::
 
@@ -256,4 +256,4 @@ For more advanced usage—including partial lowering, explicitly controlling thr
 * [BitBLAS](https://github.com/tile-ai/bitblas)
 * [Triton](https://github.com/openai/triton)
 * [Cutlass](https://github.com/NVIDIA/cutlass)
-* [PyCUDA](https://documen.tician.de/pycuda/)
+* [PyCUDA](https://documen.tician.de/pycuda/)  <!-- codespell:ignore -->
diff --git a/examples/deepseek_v32/fp8_lighting_indexer.py b/examples/deepseek_v32/fp8_lighting_indexer.py
index 64df55cbb..279dd91c7 100644
--- a/examples/deepseek_v32/fp8_lighting_indexer.py
+++ b/examples/deepseek_v32/fp8_lighting_indexer.py
@@ -258,6 +258,7 @@ def ref_fp8_mqa_logits(q: torch.Tensor, kv: torch.Tensor, weights: torch.Tensor,
     cost = mask.sum()
     return logits, cost
 
+
 def test_fp8_lighting_indexer(S=4096, SKV=8192, H=32, HKV=1, D=64, kv_stride=1):
     q = torch.randn(S, H, D, device="cuda", dtype=torch.bfloat16).to(torch.bfloat16)
     kv = torch.randn(SKV, D, device="cuda", dtype=torch.bfloat16).to(torch.bfloat16)
@@ -302,5 +303,6 @@ def logits_fn():
     print(f"logits_tflops: {logits_tflops}, logits_ms: {logits_ms}")
     print(f"cost_ref: {cost_ref}")
 
+
 if __name__ == "__main__":
     test_fp8_lighting_indexer()
diff --git a/pyproject.toml b/pyproject.toml
index 7193341dd..1d3755099 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,11 @@ skip = [
     ".venv"
 ]
 
+[tool.ruff]
+target-version = "py38"
+line-length = 100
+output-format = "full"
+
 [tool.ruff.lint]
 select = [
     # pycodestyle
@@ -48,13 +53,17 @@ ignore = [
     "E741",
     # line too long
     "E501",
+    # if-else-block instead of ternary
+    "SIM108",
     # key in dict.keys()
     "SIM118",
     # memory leaks
     "B019",
+    # zip without explicit strict
+    "B905",
     # No such file or directory
     "E902",
 ]
 [tool.ruff.lint.per-file-ignores]
 "3rdparty/**/*" = ["ALL"]
-"examples/deepseek_v32/inference/**/*" = ["ALL"]
\ No newline at end of file
+"examples/deepseek_v32/inference/**/*" = ["ALL"]
diff --git a/requirements-lint.txt b/requirements-lint.txt
index 46737db5d..8025d3ce2 100644
--- a/requirements-lint.txt
+++ b/requirements-lint.txt
@@ -1,8 +1,7 @@
 # formatting
-yapf==0.40.2
-toml==0.10.2
-tomli==2.0.1
-ruff==0.6.5
-codespell==2.3.0
+pre-commit
+yapf==0.43.0
+ruff==0.14.0
+codespell[toml]==2.4.1
 clang-format==15.0.7
 clang-tidy==18.1.8
diff --git a/setup.py b/setup.py
index fc9a5ca59..d4c3152af 100644
--- a/setup.py
+++ b/setup.py
@@ -417,7 +417,7 @@ def patch_libs(libpath):
     subprocess.run([patchelf_path, '--set-rpath', '$ORIGIN', libpath])
 
 
-class TileLangBuilPydCommand(build_py):
+class TileLangBuildPyCommand(build_py):
     """Customized setuptools install command - builds TVM after setting up LLVM."""
 
     def run(self):
@@ -643,7 +643,7 @@ def __init__(self, name, sourcedir=""):
         self.sourcedir = os.path.abspath(sourcedir)
 
 
-class TilelangExtensionBuild(build_ext):
+class TileLangExtensionBuild(build_ext):
     """
     Custom build_ext command for CMake-based projects.
 
@@ -929,8 +929,8 @@ def build_cmake(self, ext):
         CythonExtension("TileLangCython", sourcedir="."),
     ],
     cmdclass={
-        "build_py": TileLangBuilPydCommand,
+        "build_py": TileLangBuildPyCommand,
         "sdist": TileLangSdistCommand,
-        "build_ext": TilelangExtensionBuild,
+        "build_ext": TileLangExtensionBuild,
     },
 )
diff --git a/src/layout/gemm_layouts.cc b/src/layout/gemm_layouts.cc
index 7be8afe8c..1fc07ae66 100644
--- a/src/layout/gemm_layouts.cc
+++ b/src/layout/gemm_layouts.cc
@@ -588,7 +588,7 @@ Layout makeGemmVoltaABLayout(int stride, int continuous, bool is_a,
 
 // ref:
 // https://github.com/nvidia/cutlass/blob/ad7b2f5e84fcfa124cb02b91d5bd26d238c0459e/include/cutlass/layout/tensor_op_multiplicand_sm75.h#L54
-// Althought the four settings (T or NT) used distinct layouts in CUTLASS, they
+// Although the four settings (T or NT) used distinct layouts in CUTLASS, they
 // appeared to result in the same mem layout
 Layout makeTensorOpMultiplicand(int mat_stride, int mat_continuous,
                                 int elementsize, int crosswise) {
diff --git a/src/op/parallel.cc b/src/op/parallel.cc
index 9f1d92148..2a1135d7e 100644
--- a/src/op/parallel.cc
+++ b/src/op/parallel.cc
@@ -215,9 +215,9 @@ LayoutMap ParallelOpNode::InferLayout(const LayoutInferArgs &T,
     return {};
   if (level == InferLevel::kStrict) {
     LayoutMap results;
-    // Deduce buffers that shoule be complicated replicated.
+    // Deduce buffers that should be complicated replicated.
     // For example:
-    // for i in T.Parllel(m):
+    // for i in T.Parallel(m):
     //   fragment[0] = x[i]
     // then fragment[0] must be replicated on all threads.
     for (const auto &[buffer, indices] : indice_map_) {
diff --git a/src/target/codegen_cuda.cc b/src/target/codegen_cuda.cc
index 85c3dc4ae..728771d21 100644
--- a/src/target/codegen_cuda.cc
+++ b/src/target/codegen_cuda.cc
@@ -2210,7 +2210,7 @@ void CodeGenTileLangCUDA::VisitExpr_(const BufferLoadNode *op,
   DataType element_dtype = op->buffer->dtype;
 
   int lanes = op->dtype.lanes();
-  // delcare type.
+  // declare type.
   if (value_dtype.lanes() == element_dtype.lanes()) {
     std::string ref = GetBufferRef(op->dtype, op->buffer.get(), index);
     HandleVolatileLoads(ref, op, os);
diff --git a/src/target/ptx.h b/src/target/ptx.h
index dffd6e351..68d5b04a3 100644
--- a/src/target/ptx.h
+++ b/src/target/ptx.h
@@ -258,7 +258,7 @@ std::string PrintArriveBarrierAsm(const std::string &barrier);
  * \brief Print ptx barrier arrival with expect tx operation using
  * mbarrier.arrive.expect_tx \param barrier: The name of the barrier in shared
  * memory. \param byte_count: Increases the tx count of the mbarrier object to
- * track completion of addtional async transactions.
+ * track completion of additional async transactions.
  */
 std::string PrintArriveBarrierExpectTxAsm(const std::string &barrier,
                                           const std::string &byte_count);
diff --git a/src/transform/inject_assumes.cc b/src/transform/inject_assumes.cc
index a2ddfc4a0..d4c8a53c8 100644
--- a/src/transform/inject_assumes.cc
+++ b/src/transform/inject_assumes.cc
@@ -33,8 +33,8 @@ class AssumeInjector : public tvm::tir::StmtExprMutator {
     };
     tvm::StructuralHash sh;
     tvm::StructuralEqual se;
-    // grouped by expr, since the amount of varidic shape symbols is usualy much
-    // smaller than buffer
+    // grouped by expr, since the amount of variadic shape symbols is usually
+    // much smaller than buffer
     std::vector<Item> items;
     // hash => index in items
     std::unordered_map<size_t, std::vector<size_t>> buckets;
diff --git a/src/transform/loop_vectorize_dynamic.cc b/src/transform/loop_vectorize_dynamic.cc
index 0756fce43..d02582726 100644
--- a/src/transform/loop_vectorize_dynamic.cc
+++ b/src/transform/loop_vectorize_dynamic.cc
@@ -243,9 +243,9 @@ class VectorizedBodyMutator : public StmtExprMutator {
   std::vector<PrimExpr> conditions_;
 };
 
-class VectorizedConditionExtracter : public StmtExprVisitor {
+class VectorizedConditionExtractor : public StmtExprVisitor {
 public:
-  VectorizedConditionExtracter() = default;
+  VectorizedConditionExtractor() = default;
   std::vector<PrimExpr> GetConditions(const Stmt &body) {
     this->VisitStmt(body);
     return conditions_;
@@ -268,6 +268,9 @@ class VectorizedConditionExtracter : public StmtExprVisitor {
   std::vector<PrimExpr> conditions_;
 };
 
+// backward-compatibility: extracter -> extractor
+using VectorizedConditionExtracter = VectorizedConditionExtractor;
+
 class NestedLoopChecker : public StmtExprVisitor {
 public:
   NestedLoopChecker() : loop_num_(0) {}
@@ -391,8 +394,8 @@ class VectorizeRewriterDynamic : public StmtExprMutator {
     vmap.Set(fnode->loop_var, outer_var * vector_size_ + inner_var);
     Stmt body = Substitute(fnode->body, vmap);
 
-    VectorizedConditionExtracter extracter;
-    std::vector<PrimExpr> conditions = extracter.GetConditions(body);
+    VectorizedConditionExtractor extractor;
+    std::vector<PrimExpr> conditions = extractor.GetConditions(body);
 
     VectorizedConditionMutator condition_mutator(inner_var, vector_size_);
 
diff --git a/tilelang/jit/adapter/libgen.py b/tilelang/jit/adapter/libgen.py
index 89f127f0c..5d1143a67 100644
--- a/tilelang/jit/adapter/libgen.py
+++ b/tilelang/jit/adapter/libgen.py
@@ -64,7 +64,7 @@ def compile_lib(self, timeout: float = None):
         verbose = self.verbose
         if is_cuda_target(target):
             from tilelang.env import CUTLASS_INCLUDE_DIR
-            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cu", delete=False)
+            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cu", delete=False)  # noqa: SIM115
             target_arch = get_target_arch(get_target_compute_version(target))
             libpath = src.name.replace(".cu", ".so")
 
@@ -111,7 +111,7 @@ def compile_lib(self, timeout: float = None):
 
         elif is_hip_target(target):
             from tilelang.env import COMPOSABLE_KERNEL_INCLUDE_DIR
-            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False)
+            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False)  # noqa: SIM115
             libpath = src.name.replace(".cpp", ".so")
             rocm_path = find_rocm_path()
             arch = get_rocm_arch(rocm_path)
@@ -128,7 +128,7 @@ def compile_lib(self, timeout: float = None):
             ]
         elif is_cpu_target(target):
             from tilelang.contrib.cc import get_cplus_compiler
-            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False)
+            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False)  # noqa: SIM115
             libpath = src.name.replace(".cpp", ".so")
 
             command = [get_cplus_compiler(), "-std=c++17", "-fPIC", "-shared", src.name]
@@ -228,7 +228,7 @@ def compile_lib(self, timeout: float = None):
         verbose = self.verbose
         if is_cuda_target(target):
             from tilelang.env import (CUDA_HOME, CUTLASS_INCLUDE_DIR, TILELANG_TEMPLATE_PATH)
-            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cu", delete=False)
+            src = tempfile.NamedTemporaryFile(mode="w", suffix=".cu", delete=False)  # noqa: SIM115
             libpath = src.name.replace(".cu", ".cubin")
 
             project_root = osp.join(osp.dirname(__file__), "..", "..")