We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e56f44d commit 51e98e4Copy full SHA for 51e98e4
vllm/benchmarks/latency.py
@@ -82,7 +82,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
82
parser = EngineArgs.add_cli_args(parser)
83
# V1 enables prefix caching by default which skews the latency
84
# numbers. We need to disable prefix caching by default.
85
- parser.set_defaults(enable_prefix_caching=True)
+ parser.set_defaults(enable_prefix_caching=False)
86
87
88
def main(args: argparse.Namespace):
0 commit comments