Updated decorator to support unspecified default (#2026)

nvmbreughe · web-flow · commit 1e75bff99c17 · 2025-11-03T16:49:04.000-06:00
## 📌 Description Updated decorator to support unspecified default. This was causing issues when calling mm_fp4 without backend specified. Also added SM 110 as a supported backend on the cutlass backend (mm_fp4) ## 🔍 Related Issues  ## 🚀 Pull Request Checklist Thank you for contributing to FlashInfer! Before we review your pull request, please make sure the following items are complete. ### ✅ Pre-commit Checks - [ ] I have installed `pre-commit` by running `pip install pre-commit` (or used your preferred method). - [x] I have installed the hooks with `pre-commit install`. - [ ] I have run the hooks manually with `pre-commit run --all-files` and fixed any reported issues. > If you are unsure about how to set up `pre-commit`, see [the pre-commit documentation](https://pre-commit.com/). ## 🧪 Tests - [ ] Tests have been added or updated as needed. - [x] All tests are passing (`unittest`, etc.). ## Reviewer Notes   ## Summary by CodeRabbit * **New Features** * FP4 Cutlass GEMM now supports the SM110 GPU compute capability. * **Bug Fixes** * Kernels called without an explicit backend now consistently use the default backend. * **Tests** * Added a unit test to verify default backend selection and correct results when backend is omitted.
diff --git a/flashinfer/gemm.py b/flashinfer/gemm.py
@@ -1834,7 +1834,7 @@ def _trtllm_gemm_fp4_requirement(
     return True
 
 
-@supported_compute_capability([100, 103, 120, 121])
+@supported_compute_capability([100, 103, 110, 120, 121])
 def _cutlass_gemm_fp4_requirement(
     a: torch.Tensor,
     b: torch.Tensor,
diff --git a/flashinfer/utils.py b/flashinfer/utils.py
@@ -23,6 +23,7 @@
 import torch.version
 from torch.torch_version import TorchVersion
 from torch.torch_version import __version__ as torch_version
+import inspect
 
 from .jit.spdlog import gen_spdlog_module
 
@@ -950,6 +951,9 @@ def backend_requirement(
     """
 
     def decorator(func):
+        # Get the function signature once for reuse
+        sig = inspect.signature(func)
+
         def is_backend_supported(backend, cc=None):
             # Is this backend present?
             if backend not in backend_checks:
@@ -971,7 +975,9 @@ def is_compute_capability_supported(cc):
                 for checker in backend_checks.values()
             )
 
-        def is_problem_size_supported(*args, **kwargs):
+        # @note: this function does not automatically apply defaults to the arguments.
+        def _is_problem_size_supported(*args, **kwargs):
+            # At this point, kwargs should have defaults applied, so backend should be present
             backend = kwargs.get("backend")
             if backend not in backend_checks:
                 raise BackendSupportedError(
@@ -983,26 +989,34 @@ def is_problem_size_supported(*args, **kwargs):
             else:
                 return req_checker(*args, **kwargs)
 
+        # @brief: Wrapper function that calls the orignal, decorated function, after applying a number of checks.
+        # @note that here we manually apply defaults to the arguments in the wrapper function when doing validation.
         @functools.wraps(func)
         def wrapper(*args, **kwargs):
-            backend = kwargs.get("backend")
             # skip_check is an optional argument that the decorator adds to any API function.
             # It prevents the performance overhead of checking.
             skip_check = kwargs.pop("skip_check", False)
 
             if not skip_check:
+                # Apply defaults from the function signature for validation
+                # This ensures that all parameters (including backend) have their default values
+                # if not explicitly provided by the caller
+                bound_args = sig.bind(*args, **kwargs)
+                bound_args.apply_defaults()
+                # Convert to kwargs for validation functions
+                kwargs_with_defaults = dict(bound_args.arguments)
+
+                backend = kwargs_with_defaults.get("backend")
+
                 capability = None
                 # Find the first tensor argument.
                 # Assume all tensors are on the same device/capability.
                 # We could consider check all tensors at a performance cost.
                 tensor_arg = None
-                for arg in args:
-                    if isinstance(arg, torch.Tensor):
-                        tensor_arg = arg
-                if tensor_arg is None:
-                    for value in kwargs.values():
-                        if isinstance(value, torch.Tensor):
-                            tensor_arg = value
+                for value in kwargs_with_defaults.values():
+                    if isinstance(value, torch.Tensor):
+                        tensor_arg = value
+                        break
 
                 if tensor_arg is not None:
                     # Get compute capability from the first tensor
@@ -1015,10 +1029,11 @@ def wrapper(*args, **kwargs):
                     raise BackendSupportedError(
                         f"{func.__name__} does not support backend '{backend}'{extra}"
                     )
-                if not is_problem_size_supported(*args, **kwargs):
+                if not _is_problem_size_supported(**kwargs_with_defaults):
                     raise ValueError(
                         f"Problem size is not supported for {func.__name__}"
                     )
+
             return func(*args, **kwargs)
 
         wrapper.is_backend_supported = is_backend_supported
diff --git a/tests/utils/test_decorators.py b/tests/utils/test_decorators.py
@@ -210,3 +210,39 @@ def my_documented_function(x, backend="backend"):
     # Verify that added methods still exist
     assert hasattr(my_documented_function, "is_backend_supported")
     assert hasattr(my_documented_function, "is_compute_capability_supported")
+
+
+def test_backend_default_parameter():
+    """Test that backend_requirement correctly uses default backend parameter when not specified."""
+    if not torch.cuda.is_available():
+        pytest.skip("Skipping CUDA tests (no GPU available)")
+
+    # Get actual device capability
+    x = torch.randn(1, 1, device="cuda")
+    major, minor = torch.cuda.get_device_capability(x.device)
+    actual_capability = major * 10 + minor
+
+    @supported_compute_capability([80, 86, 89, 90, actual_capability])
+    def _cutlass_check(x, backend):
+        return x.shape[0] > 0
+
+    @supported_compute_capability([75, 80, 86, 89, 90, actual_capability])
+    def _cudnn_check(x, backend):
+        return x.shape[0] > 0
+
+    @backend_requirement({"cutlass": _cutlass_check, "cudnn": _cudnn_check})
+    def my_kernel(x, backend="cudnn"):
+        return x * 2
+
+    x = torch.randn(10, 10, device="cuda")
+
+    # Test that calling without backend argument uses the default "cudnn"
+    # This should work without raising an error
+    result = my_kernel(x)
+    assert result.shape == x.shape
+    assert torch.allclose(result, x * 2)
+
+    # Test that explicitly passing a different backend also works
+    result2 = my_kernel(x, backend="cutlass")
+    assert result2.shape == x.shape
+    assert torch.allclose(result2, x * 2)