-
-
Notifications
You must be signed in to change notification settings - Fork 11.6k
[Attention] Implement universal BACKEND_MAP #25900
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
3ba013e
55ad364
950f955
1e2fbce
f7a2fd0
2602f5c
8f3d4d9
e2dbb43
21a0e10
715f965
ff18c0b
7e31b74
825fcee
40c7ea3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,13 +3,16 @@ | |
| """Attention backend registry""" | ||
|
|
||
| import enum | ||
| from typing import Optional | ||
|
|
||
| from vllm.utils import resolve_obj_by_qualname | ||
|
|
||
|
|
||
| class _Backend(enum.Enum): | ||
| FLASH_ATTN = enum.auto() | ||
| TRITON_ATTN = enum.auto() | ||
| XFORMERS = enum.auto() | ||
| ROCM_FLASH = enum.auto() | ||
| ROCM_ATTN = enum.auto() | ||
| ROCM_AITER_MLA = enum.auto() | ||
| ROCM_AITER_FA = enum.auto() # used for ViT attn backend | ||
| TORCH_SDPA = enum.auto() | ||
|
|
@@ -24,5 +27,64 @@ class _Backend(enum.Enum): | |
| NO_ATTENTION = enum.auto() | ||
| FLEX_ATTENTION = enum.auto() | ||
| TREE_ATTN = enum.auto() | ||
| ROCM_ATTN = enum.auto() | ||
| ROCM_AITER_UNIFIED_ATTN = enum.auto() | ||
|
|
||
|
|
||
| BACKEND_MAP = {} | ||
|
|
||
|
|
||
| def register_attn_backend(backend: _Backend, class_path: Optional[str] = None): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How does this work for OOT backends, wouldn't they have to extend the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The plan is for them to override an existing backend (see #24794 (comment)). We could extend this in the future (and find an alternative to the enum) to make it cleaner though. |
||
| """ | ||
| Decorator: register a custom attention backend into BACKEND_MAPPING. | ||
| - If class_path is provided, use it. | ||
| - Otherwise, auto-generate from the class object. | ||
| Validation: only checks if 'backend' is a valid _Backend enum member. | ||
| Overwriting existing mappings is allowed. | ||
MatthewBonanni marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| """ | ||
| if not isinstance(backend, _Backend): | ||
| raise ValueError(f"{backend} is not a valid _Backend enum value.") | ||
|
|
||
| def decorator(cls): | ||
| path = class_path or f"{cls.__module__}.{cls.__qualname__}" | ||
| BACKEND_MAP[backend] = path | ||
| return cls | ||
|
|
||
| return decorator | ||
|
|
||
|
|
||
| def backend_to_class_str(backend: _Backend) -> str: | ||
| """Get the backend class string | ||
|
|
||
| Args: | ||
| backend: The backend enum value | ||
|
|
||
| Returns: | ||
| The backend class string | ||
| """ | ||
| return BACKEND_MAP[backend] | ||
|
|
||
|
|
||
| def backend_to_class(backend: _Backend) -> type: | ||
| """Get the backend class. | ||
|
|
||
| Args: | ||
| backend: The backend enum value | ||
|
|
||
| Returns: | ||
| The backend class | ||
| """ | ||
| backend_class_name = backend_to_class_str(backend) | ||
| return resolve_obj_by_qualname(backend_class_name) | ||
|
|
||
|
|
||
| def backend_name_to_enum(backend_name: str) -> Optional[_Backend]: | ||
| """ | ||
| Convert a string backend name to a _Backend enum value. | ||
|
|
||
| Returns: | ||
| _Backend: enum value if backend_name is a valid in-tree type | ||
| None: otherwise it's an invalid in-tree type or an out-of-tree platform | ||
| is loaded. | ||
| """ | ||
| assert backend_name is not None | ||
| return _Backend[backend_name] if backend_name in _Backend.__members__ else None | ||
Uh oh!
There was an error while loading. Please reload this page.