Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
383e49f
feat(rocm): register VLLM_ROCM_USE_AITER_TRITON_FUSED_RMSNORM_FP4_QUA…
shantipriya-amd Jun 3, 2026
6384b73
feat(rocm): rename to FUSION_* namespace, wire _aiter_ops F2/F3, add …
shantipriya-amd Jun 4, 2026
b2b117c
fix(rocm): correct q_out docstring shape in fused_rope_and_mla_kv_cac…
shantipriya-amd Jun 4, 2026
360f4d7
test(rocm): TC-1.x–TC-4.x fusion flag tests for F2/F3 dispatch
shantipriya-amd Jun 4, 2026
145ed23
test(rocm): extend TC-3.x to cover DeepSeek V2-Lite (num_q_heads=16)
shantipriya-amd Jun 4, 2026
daaf6a8
feat(rocm): add MXFP4 fusion patterns + ops for RMSNorm+MXFP4-quant (F2)
shantipriya-amd Jun 4, 2026
7e9ffd4
refactor(rocm): remove AR+MXFP4 fusion ops — defer to follow-on PR
shantipriya-amd Jun 4, 2026
2cc1fa8
test(rocm): remove AR+MXFP4 test stubs from test files
shantipriya-amd Jun 4, 2026
bf0d6ed
fix(mxfp4): add _pattern_replacements tracking, INFO logging, fix may…
shantipriya-amd Jun 4, 2026
716ca3d
fix(tests): guard _C ops against source-only runs; skip PR3-only disp…
shantipriya-amd Jun 5, 2026
9001e42
fix(fusion): guard group-quant patterns against missing per_token_gro…
shantipriya-amd Jun 5, 2026
5a42854
fix(fusion): guard FP8-group patterns in rocm_aiter_fusion against mi…
shantipriya-amd Jun 5, 2026
5bf7f3f
refactor(rocm): remove F2/F3 env vars; auto-enable via feature probes
shantipriya-amd Jun 8, 2026
7bb185b
fix(test): rewrite test_mxfp4_patterns_fire_on_model to use torch.com…
shantipriya-amd Jun 8, 2026
c1207e5
fix(test): address code review issues in F2/F3 test files
shantipriya-amd Jun 8, 2026
5f817d5
fix(_aiter_ops): use getattr for VLLM_ROCM_USE_AITER_LINEAR_HIPBMM (v…
shantipriya-amd Jun 8, 2026
4a8a5ed
test(f3): add test_f3_fused_replaces_two_ops — dispatch benefit verif…
shantipriya-amd Jun 8, 2026
b2baf91
fix(test): correct test_f3_fused_replaces_two_ops docstring
shantipriya-amd Jun 8, 2026
99331d9
test(f2): add negative assertion — standalone quant absent after fusion
shantipriya-amd Jun 8, 2026
f0a02e2
tests: add check_not_in_after_ops to TestBackend and test_mxfp4_patte…
shantipriya-amd Jun 8, 2026
c2d8708
docs: add F3 TPOT baseline vs F3-on comparison plot
shantipriya-amd Jun 8, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added docs/assets/f3_tpot_comparison.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ arange = "arange"
thw = "thw"
subtile = "subtile"
HSA = "HSA"
# n_occurences is the real column name emitted by uplift-plan CSV output;
# fixing the spelling here would break CSV key lookups in tests
occurences = "occurences"
setp = "setp"
CPY = "CPY"
thr = "thr"
Expand Down
11 changes: 11 additions & 0 deletions tests/compile/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,17 @@ def check_after_ops(self, ops: Sequence[OpOverload | OpOverloadPacket]):
assert num_pre == 0, f"Unexpected op {op.name()} in pre-pass graph"
assert num_post > 0, f"Op {op.name()} not found in post-pass graph"

def check_not_in_after_ops(
self, ops: Sequence[OpOverload | OpOverloadPacket]
):
"""Assert ops are absent from the post-pass graph (fully replaced)."""
for op in ops:
num_post = len(list(find_op_nodes(op, self.graph_post_pass)))
assert num_post == 0, (
f"Op {op.name()} should be absent from post-pass graph "
f"but found {num_post} node(s)"
)

def op_count(self, op: OpOverload | OpOverloadPacket, before=False) -> int:
graph = self.graph_pre_pass if before else self.graph_post_pass
return len(list(find_op_nodes(op, graph)))
Expand Down
Loading
Loading