-
Notifications
You must be signed in to change notification settings - Fork 272
fix: vLLM serving and model mounting #1571
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
847ec6c
55b7d56
d0ecd5b
e698424
c4ec0a5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -290,7 +290,28 @@ def exec_model_in_container(self, model_path, cmd_args, args): | |
| return True | ||
|
|
||
| def setup_mounts(self, model_path, args): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (code-quality): Low code quality found in Model.setup_mounts - 16% ( ExplanationThe quality score for this function is below the quality threshold of 25%.This score is a combination of the method length, cognitive complexity and working memory. How can you solve this? It might be worth refactoring this function to make it shorter and more readable.
|
||
| if model_path and os.path.exists(model_path): | ||
| if args.runtime == "vllm": | ||
| model_base = "" | ||
| if self.store and hasattr(self, 'model_tag'): | ||
| ref_file = self.store.get_ref_file(self.model_tag) | ||
| if ref_file and hasattr(ref_file, 'hash'): | ||
| model_base = self.store.model_base_directory | ||
| if not model_base: | ||
| # Might be needed for file:// paths directly used with vLLM. | ||
| if model_path and os.path.exists(model_path): | ||
| if os.path.isfile(model_path): | ||
| model_base = os.path.dirname(model_path) | ||
| elif os.path.isdir(model_path): | ||
| model_base = model_path | ||
sourcery-ai[bot] marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if model_base: | ||
| self.engine.add([f"--mount=type=bind,src={model_base},destination={MNT_DIR},ro"]) | ||
| else: | ||
| raise ValueError( | ||
| f'Could not determine a valid host directory to mount for model {self.model}' | ||
| + 'Ensure the model path is correct or the model store is properly configured.' | ||
kush-gupt marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ) | ||
|
|
||
| elif model_path and os.path.exists(model_path): | ||
| if hasattr(self, 'split_model'): | ||
| self.engine.add([f"--mount=type=bind,src={model_path},destination={MNT_DIR}/{self.mnt_path},ro"]) | ||
|
|
||
|
|
@@ -531,9 +552,38 @@ def build_exec_args_serve(self, args, exec_model_path, chat_template_path="", mm | |
| def handle_runtime(self, args, exec_args, exec_model_path): | ||
| set_accel_env_vars() | ||
| if args.runtime == "vllm": | ||
| exec_model_path = os.path.dirname(exec_model_path) | ||
| # Left out "vllm", "serve" the image entrypoint already starts it | ||
| exec_args = ["--port", args.port, "--model", MNT_FILE, "--max_model_len", "2048"] | ||
| container_model_path = "" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (code-quality): We've found these issues:
|
||
| ref_file = None | ||
| if self.store: | ||
| ref_file = self.store.get_ref_file(self.model_tag) | ||
|
|
||
| if ref_file and ref_file.hash: | ||
| snapshot_dir_name = ref_file.hash | ||
| container_model_path = os.path.join(MNT_DIR, "snapshots", snapshot_dir_name) | ||
| else: | ||
| current_model_host_path = self.get_model_path(args) | ||
| if os.path.isdir(current_model_host_path): | ||
| container_model_path = MNT_DIR | ||
| else: | ||
| container_model_path = os.path.join(MNT_DIR, os.path.basename(current_model_host_path)) | ||
|
|
||
| vllm_max_model_len = 2048 | ||
| if args.vllm_max_model_len: | ||
| vllm_max_model_len = args.vllm_max_model_len | ||
sourcery-ai[bot] marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| exec_args = [ | ||
| "--port", | ||
| str(args.port), | ||
| "--model", | ||
| str(container_model_path), | ||
| "--max_model_len", | ||
| str(vllm_max_model_len), | ||
| "--served-model-name", | ||
| self.model_name, | ||
| ] | ||
|
|
||
| if hasattr(args, 'runtime_args') and args.runtime_args: | ||
sourcery-ai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| exec_args.extend(args.runtime_args) | ||
| else: | ||
| gpu_args = self.gpu_args(args=args) | ||
| if gpu_args is not None: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.