-
-
Notifications
You must be signed in to change notification settings - Fork 15.3k
Do not allow disabling chunked prefill for generation models #28833
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -1353,26 +1353,6 @@ def create_engine_config( | |||||||||
|
|
||||||||||
| # Set default arguments for V1 Engine. | ||||||||||
| self._set_default_args(usage_context, model_config) | ||||||||||
| # Disable chunked prefill and prefix caching for: | ||||||||||
| # POWER (ppc64le)/ARM/s390x/RISCV CPUs in V1 | ||||||||||
| if current_platform.is_cpu() and current_platform.get_cpu_architecture() in ( | ||||||||||
| CpuArchEnum.POWERPC, | ||||||||||
| CpuArchEnum.S390X, | ||||||||||
| CpuArchEnum.ARM, | ||||||||||
| CpuArchEnum.RISCV, | ||||||||||
| ): | ||||||||||
| logger.info( | ||||||||||
| "Chunked prefill is not supported for ARM and POWER, " | ||||||||||
| "S390X and RISC-V CPUs; " | ||||||||||
| "disabling it for V1 backend." | ||||||||||
| ) | ||||||||||
| self.enable_chunked_prefill = False | ||||||||||
| logger.info( | ||||||||||
| "Prefix caching is not supported for ARM and POWER, " | ||||||||||
| "S390X and RISC-V CPUs; " | ||||||||||
| "disabling it for V1 backend." | ||||||||||
| ) | ||||||||||
| self.enable_prefix_caching = False | ||||||||||
|
|
||||||||||
| assert self.enable_chunked_prefill is not None | ||||||||||
|
|
||||||||||
|
|
@@ -1935,46 +1915,99 @@ def _set_default_args( | |||||||||
| self, usage_context: UsageContext, model_config: ModelConfig | ||||||||||
| ) -> None: | ||||||||||
| """Set Default Arguments for V1 Engine.""" | ||||||||||
| # Check if running on CPU architecture with feature restrictions | ||||||||||
| is_restricted_cpu = ( | ||||||||||
| current_platform.is_cpu() | ||||||||||
| and current_platform.get_cpu_architecture() | ||||||||||
| in ( | ||||||||||
| CpuArchEnum.POWERPC, | ||||||||||
| CpuArchEnum.S390X, | ||||||||||
| CpuArchEnum.ARM, | ||||||||||
| CpuArchEnum.RISCV, | ||||||||||
| ) | ||||||||||
| ) | ||||||||||
| restricted_cpu_names = "ARM, POWER, S390X, and RISC-V CPUs" | ||||||||||
|
|
||||||||||
| # Validate platform-specific restrictions | ||||||||||
| if is_restricted_cpu: | ||||||||||
| if self.enable_chunked_prefill is True: | ||||||||||
| raise ValueError( | ||||||||||
| f"Chunked prefill is not supported for {restricted_cpu_names}." | ||||||||||
| ) | ||||||||||
| if self.enable_prefix_caching is True: | ||||||||||
| raise ValueError( | ||||||||||
| f"Prefix caching is not supported for {restricted_cpu_names}." | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| # Validate model-specific requirements | ||||||||||
| # (except on restricted CPUs where chunked prefill must be disabled) | ||||||||||
| if ( | ||||||||||
| not is_restricted_cpu | ||||||||||
| and model_config.runner_type == "generate" | ||||||||||
| and self.enable_chunked_prefill is False | ||||||||||
| ): | ||||||||||
| raise ValueError("Chunked prefill is required for generation models. ") | ||||||||||
|
||||||||||
| raise ValueError("Chunked prefill is required for generation models. ") | |
| raise ValueError("Chunked prefill is required for generation models.") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would suggest changing the message to "Chunked prefill cannot be disabled for generation models."
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
shouldn't mention V1 anymore ...
| "Chunked prefill is not supported for %s; " | |
| "disabling it for V1 backend.", | |
| "Chunked prefill is not supported for %s " | |
| "and will be disabled.", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| "Prefix caching is not supported for %s; " | |
| "disabling it for V1 backend.", | |
| "Prefix caching is not supported for %s; " | |
| "and will be disabled.", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not check these after applying defaults?