From 3a6a65e9899d0925dd793a4f9febaf86985c495b Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Sat, 8 Feb 2025 23:01:34 -0500 Subject: [PATCH 1/7] [Bugfix] Guided decoding falls back to outlines when fails to import xgrammar Signed-off-by: Yuan Tang --- .../guided_decoding/__init__.py | 65 ++++++++++--------- .../guided_decoding/xgrammar_decoding.py | 2 + 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/vllm/model_executor/guided_decoding/__init__.py b/vllm/model_executor/guided_decoding/__init__.py index cf96461a549f..93e2323d23ee 100644 --- a/vllm/model_executor/guided_decoding/__init__.py +++ b/vllm/model_executor/guided_decoding/__init__.py @@ -40,43 +40,50 @@ def maybe_backend_fallback( guided_params.backend = "outlines" if guided_params.backend == "xgrammar": - # xgrammar only has x86 wheels for linux, fallback to outlines - from vllm.platforms import current_platform - if current_platform.get_cpu_architecture() is not CpuArchEnum.X86: - logger.warning("xgrammar is only supported on x86 CPUs. " + from vllm.model_executor.guided_decoding.xgrammar_decoding import ( # noqa + xgr_installed) + if not xgr_installed: + logger.warning("xgrammar module cannot be imported successfully. " "Falling back to use outlines instead.") guided_params.backend = "outlines" + else: + # xgrammar only has x86 wheels for linux, fallback to outlines + from vllm.platforms import current_platform + if current_platform.get_cpu_architecture() is not CpuArchEnum.X86: + logger.warning("xgrammar is only supported on x86 CPUs. " + "Falling back to use outlines instead.") + guided_params.backend = "outlines" - # xgrammar doesn't support regex or choice, fallback to outlines - if guided_params.regex is not None or guided_params.choice is not None: - logger.warning( - "xgrammar only supports json or grammar guided decoding. " - "Falling back to use outlines instead.") - guided_params.backend = "outlines" - - # xgrammar doesn't support some JSON schema features - elif (guided_params.json is not None - and has_xgrammar_unsupported_json_features(guided_params.json)): - logger.warning( - "xgrammar does not support advanced JSON schema features like " - "patterns or numeric ranges. " - "Falling back to use outlines instead.") - guided_params.backend = "outlines" + # xgrammar doesn't support regex or choice, fallback to outlines + if guided_params.regex is not None or guided_params.choice is not None: + logger.warning( + "xgrammar only supports json or grammar guided decoding. " + "Falling back to use outlines instead.") + guided_params.backend = "outlines" - # xgrammar only supports GBNF grammars, so we must convert Lark. - # We must check if the grammar is likely Lark and if that - # grammar is convertible to GBNF - elif (guided_params.grammar is not None - and grammar_is_likely_lark(guided_params.grammar)): - try: - convert_lark_to_gbnf(guided_params.grammar) - except Exception: + # xgrammar doesn't support some JSON schema features + elif (guided_params.json is not None + and has_xgrammar_unsupported_json_features(guided_params.json)): logger.warning( - "xgrammar does not support Lark grammars and the " - "grammar failed to convert to GBNF. " + "xgrammar does not support advanced JSON schema features like " + "patterns or numeric ranges. " "Falling back to use outlines instead.") guided_params.backend = "outlines" + # xgrammar only supports GBNF grammars, so we must convert Lark. + # We must check if the grammar is likely Lark and if that + # grammar is convertible to GBNF + elif (guided_params.grammar is not None + and grammar_is_likely_lark(guided_params.grammar)): + try: + convert_lark_to_gbnf(guided_params.grammar) + except Exception: + logger.warning( + "xgrammar does not support Lark grammars and the " + "grammar failed to convert to GBNF. " + "Falling back to use outlines instead.") + guided_params.backend = "outlines" + if (guided_params.backend == "outlines" and guided_params.json_object is not None): # outlines doesn't support json_object, fallback to xgrammar diff --git a/vllm/model_executor/guided_decoding/xgrammar_decoding.py b/vllm/model_executor/guided_decoding/xgrammar_decoding.py index c01bd3af1d5b..fc3a4cd4bebc 100644 --- a/vllm/model_executor/guided_decoding/xgrammar_decoding.py +++ b/vllm/model_executor/guided_decoding/xgrammar_decoding.py @@ -14,7 +14,9 @@ try: import xgrammar as xgr from xgrammar.base import _core as xgr_core + xgr_installed = True except ImportError: + xgr_installed = False pass from vllm.model_executor.guided_decoding.utils import (convert_lark_to_gbnf, From a69201d112707198dbbd05366df8e7fcc1d48837 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Sat, 8 Feb 2025 23:05:30 -0500 Subject: [PATCH 2/7] fix line length Signed-off-by: Yuan Tang --- vllm/model_executor/guided_decoding/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/guided_decoding/__init__.py b/vllm/model_executor/guided_decoding/__init__.py index 93e2323d23ee..d31e192ed5e2 100644 --- a/vllm/model_executor/guided_decoding/__init__.py +++ b/vllm/model_executor/guided_decoding/__init__.py @@ -62,8 +62,8 @@ def maybe_backend_fallback( guided_params.backend = "outlines" # xgrammar doesn't support some JSON schema features - elif (guided_params.json is not None - and has_xgrammar_unsupported_json_features(guided_params.json)): + elif (guided_params.json is not None and + has_xgrammar_unsupported_json_features(guided_params.json)): logger.warning( "xgrammar does not support advanced JSON schema features like " "patterns or numeric ranges. " From 984b42aad62df5a744289b7806fad51556545c26 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Sat, 8 Feb 2025 23:16:27 -0500 Subject: [PATCH 3/7] Fix check Signed-off-by: Yuan Tang --- vllm/model_executor/guided_decoding/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/guided_decoding/__init__.py b/vllm/model_executor/guided_decoding/__init__.py index d31e192ed5e2..53158827c09a 100644 --- a/vllm/model_executor/guided_decoding/__init__.py +++ b/vllm/model_executor/guided_decoding/__init__.py @@ -55,7 +55,7 @@ def maybe_backend_fallback( guided_params.backend = "outlines" # xgrammar doesn't support regex or choice, fallback to outlines - if guided_params.regex is not None or guided_params.choice is not None: + if guided_params.regex is not None or guided_params.choice is not None: # noqa logger.warning( "xgrammar only supports json or grammar guided decoding. " "Falling back to use outlines instead.") @@ -65,7 +65,7 @@ def maybe_backend_fallback( elif (guided_params.json is not None and has_xgrammar_unsupported_json_features(guided_params.json)): logger.warning( - "xgrammar does not support advanced JSON schema features like " + "xgrammar does not support advanced JSON schema features like " # noqa "patterns or numeric ranges. " "Falling back to use outlines instead.") guided_params.backend = "outlines" From 9d5b3e83dd342b32057a0831f5d72f68c6d2e245 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Mon, 10 Feb 2025 11:49:02 -0500 Subject: [PATCH 4/7] Address commments Signed-off-by: Yuan Tang --- .../guided_decoding/__init__.py | 68 ++++++++++--------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/vllm/model_executor/guided_decoding/__init__.py b/vllm/model_executor/guided_decoding/__init__.py index 53158827c09a..d1ebb9c73737 100644 --- a/vllm/model_executor/guided_decoding/__init__.py +++ b/vllm/model_executor/guided_decoding/__init__.py @@ -42,47 +42,49 @@ def maybe_backend_fallback( if guided_params.backend == "xgrammar": from vllm.model_executor.guided_decoding.xgrammar_decoding import ( # noqa xgr_installed) - if not xgr_installed: - logger.warning("xgrammar module cannot be imported successfully. " + # xgrammar only has x86 wheels for linux, fallback to outlines + from vllm.platforms import current_platform + if current_platform.get_cpu_architecture() is not CpuArchEnum.X86: + logger.warning("xgrammar is only supported on x86 CPUs. " "Falling back to use outlines instead.") guided_params.backend = "outlines" - else: - # xgrammar only has x86 wheels for linux, fallback to outlines - from vllm.platforms import current_platform - if current_platform.get_cpu_architecture() is not CpuArchEnum.X86: - logger.warning("xgrammar is only supported on x86 CPUs. " - "Falling back to use outlines instead.") - guided_params.backend = "outlines" - # xgrammar doesn't support regex or choice, fallback to outlines - if guided_params.regex is not None or guided_params.choice is not None: # noqa - logger.warning( - "xgrammar only supports json or grammar guided decoding. " - "Falling back to use outlines instead.") - guided_params.backend = "outlines" + # xgrammar doesn't support regex or choice, fallback to outlines + if guided_params.regex is not None or guided_params.choice is not None: # noqa + logger.warning( + "xgrammar only supports json or grammar guided decoding. " + "Falling back to use outlines instead.") + guided_params.backend = "outlines" - # xgrammar doesn't support some JSON schema features - elif (guided_params.json is not None and - has_xgrammar_unsupported_json_features(guided_params.json)): + # xgrammar doesn't support some JSON schema features + elif (guided_params.json is not None and + has_xgrammar_unsupported_json_features(guided_params.json)): + logger.warning( + "xgrammar does not support advanced JSON schema features like " # noqa + "patterns or numeric ranges. " + "Falling back to use outlines instead.") + guided_params.backend = "outlines" + + # xgrammar only supports GBNF grammars, so we must convert Lark. + # We must check if the grammar is likely Lark and if that + # grammar is convertible to GBNF + elif (guided_params.grammar is not None + and grammar_is_likely_lark(guided_params.grammar)): + try: + convert_lark_to_gbnf(guided_params.grammar) + except Exception: logger.warning( - "xgrammar does not support advanced JSON schema features like " # noqa - "patterns or numeric ranges. " + "xgrammar does not support Lark grammars and the " + "grammar failed to convert to GBNF. " "Falling back to use outlines instead.") guided_params.backend = "outlines" - # xgrammar only supports GBNF grammars, so we must convert Lark. - # We must check if the grammar is likely Lark and if that - # grammar is convertible to GBNF - elif (guided_params.grammar is not None - and grammar_is_likely_lark(guided_params.grammar)): - try: - convert_lark_to_gbnf(guided_params.grammar) - except Exception: - logger.warning( - "xgrammar does not support Lark grammars and the " - "grammar failed to convert to GBNF. " - "Falling back to use outlines instead.") - guided_params.backend = "outlines" + # If the xgrammar module cannot be imported successfully for some reason, + # we should still allow users to use guided decoding with a fallback. + elif not xgr_installed: + logger.warning("xgrammar module cannot be imported successfully. " + "Falling back to use outlines instead.") + guided_params.backend = "outlines" if (guided_params.backend == "outlines" and guided_params.json_object is not None): From e94708d9c31c870b35405647396c0769854ac200 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Mon, 10 Feb 2025 11:49:59 -0500 Subject: [PATCH 5/7] Remove noqa Signed-off-by: Yuan Tang --- vllm/model_executor/guided_decoding/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/guided_decoding/__init__.py b/vllm/model_executor/guided_decoding/__init__.py index d1ebb9c73737..697a0817a2a8 100644 --- a/vllm/model_executor/guided_decoding/__init__.py +++ b/vllm/model_executor/guided_decoding/__init__.py @@ -40,7 +40,7 @@ def maybe_backend_fallback( guided_params.backend = "outlines" if guided_params.backend == "xgrammar": - from vllm.model_executor.guided_decoding.xgrammar_decoding import ( # noqa + from vllm.model_executor.guided_decoding.xgrammar_decoding import ( xgr_installed) # xgrammar only has x86 wheels for linux, fallback to outlines from vllm.platforms import current_platform @@ -50,7 +50,7 @@ def maybe_backend_fallback( guided_params.backend = "outlines" # xgrammar doesn't support regex or choice, fallback to outlines - if guided_params.regex is not None or guided_params.choice is not None: # noqa + if guided_params.regex is not None or guided_params.choice is not None: logger.warning( "xgrammar only supports json or grammar guided decoding. " "Falling back to use outlines instead.") @@ -60,7 +60,7 @@ def maybe_backend_fallback( elif (guided_params.json is not None and has_xgrammar_unsupported_json_features(guided_params.json)): logger.warning( - "xgrammar does not support advanced JSON schema features like " # noqa + "xgrammar does not support advanced JSON schema features like " "patterns or numeric ranges. " "Falling back to use outlines instead.") guided_params.backend = "outlines" From cac01104bf012cdce332f19376bc0d741a38c943 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Mon, 10 Feb 2025 12:29:34 -0500 Subject: [PATCH 6/7] Fix line length Signed-off-by: Yuan Tang --- vllm/model_executor/guided_decoding/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/guided_decoding/__init__.py b/vllm/model_executor/guided_decoding/__init__.py index 697a0817a2a8..85ed016a978f 100644 --- a/vllm/model_executor/guided_decoding/__init__.py +++ b/vllm/model_executor/guided_decoding/__init__.py @@ -57,8 +57,8 @@ def maybe_backend_fallback( guided_params.backend = "outlines" # xgrammar doesn't support some JSON schema features - elif (guided_params.json is not None and - has_xgrammar_unsupported_json_features(guided_params.json)): + elif (guided_params.json is not None + and has_xgrammar_unsupported_json_features(guided_params.json)): logger.warning( "xgrammar does not support advanced JSON schema features like " "patterns or numeric ranges. " From 563cf4a9bbdb88ce93d82bffc29a15b44c1cb94e Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Mon, 10 Feb 2025 12:42:42 -0500 Subject: [PATCH 7/7] Fix line length again Signed-off-by: Yuan Tang --- vllm/model_executor/guided_decoding/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/guided_decoding/__init__.py b/vllm/model_executor/guided_decoding/__init__.py index 85ed016a978f..3eb7d186eb00 100644 --- a/vllm/model_executor/guided_decoding/__init__.py +++ b/vllm/model_executor/guided_decoding/__init__.py @@ -79,7 +79,7 @@ def maybe_backend_fallback( "Falling back to use outlines instead.") guided_params.backend = "outlines" - # If the xgrammar module cannot be imported successfully for some reason, + # If the xgrammar module cannot be imported successfully, # we should still allow users to use guided decoding with a fallback. elif not xgr_installed: logger.warning("xgrammar module cannot be imported successfully. "