Skip to content

Commit fa831d8

Browse files
kouroshHakhaelliot-barn
authored andcommitted
[serve][llm][refactor] Align Ray Serve LLM Code Structure with Architectural Design (#57889)
Signed-off-by: Kourosh Hakhamaneshi <[email protected]> Signed-off-by: elliot-barn <[email protected]>
1 parent 6836383 commit fa831d8

File tree

76 files changed

+173
-166
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+173
-166
lines changed

python/ray/llm/_internal/common/callbacks/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
if TYPE_CHECKING:
88
from ray.llm._internal.common.utils.download_utils import NodeModelDownloadable
9-
from ray.llm._internal.serve.configs.server_models import LLMConfig
9+
from ray.llm._internal.serve.core.configs.llm_config import LLMConfig
1010

1111
logger = logging.getLogger(__name__)
1212

python/ray/llm/_internal/common/callbacks/cloud_downloader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class CloudDownloader(CallbackBase):
3939
Example:
4040
```
4141
from ray.llm._internal.common.callbacks.cloud_downloader import CloudDownloader
42-
from ray.llm._internal.serve.configs.server_models import LLMConfig
42+
from ray.llm._internal.serve.core.configs.llm_config import LLMConfig
4343
config = LLMConfig(
4444
...
4545
callback_config={

python/ray/llm/_internal/serve/config_generator/utils/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22

3-
from ray.llm._internal.serve.configs.constants import RAYLLM_HOME_DIR
3+
from ray.llm._internal.serve.constants import RAYLLM_HOME_DIR
44

55
TEMPLATE_DIR = os.path.normpath(
66
os.path.join(

python/ray/llm/_internal/serve/config_generator/utils/gpu.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
DEFAULT_DEPLOYMENT_CONFIGS_FILE,
88
TEMPLATE_DIR,
99
)
10-
from ray.llm._internal.serve.configs.server_models import GPUType
10+
from ray.llm._internal.serve.core.configs.llm_config import GPUType
1111

1212
# All practical GPUs
1313
ALL_GPU_TYPES = [

python/ray/llm/_internal/serve/config_generator/utils/text_completion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from ray.llm._internal.serve.config_generator.utils.models import (
1616
TextCompletionModelConfig,
1717
)
18-
from ray.llm._internal.serve.configs.server_models import LLMConfig
18+
from ray.llm._internal.serve.core.configs.llm_config import LLMConfig
1919

2020

2121
def get_model_default_config(model_id: str) -> Dict[str, Any]:

python/ray/llm/_internal/serve/configs/server_models.py renamed to python/ray/llm/_internal/serve/core/configs/llm_config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@
2828
)
2929
from ray.llm._internal.common.utils.download_utils import NodeModelDownloadable
3030
from ray.llm._internal.common.utils.import_utils import load_class, try_import
31-
from ray.llm._internal.serve.configs.constants import (
31+
from ray.llm._internal.serve.constants import (
3232
DEFAULT_MULTIPLEX_DOWNLOAD_TIMEOUT_S,
3333
DEFAULT_MULTIPLEX_DOWNLOAD_TRIES,
3434
MODEL_RESPONSE_BATCH_TIMEOUT_MS,
3535
)
36-
from ray.llm._internal.serve.deployments.llm.vllm.kv_transfer_backends import (
36+
from ray.llm._internal.serve.engines.vllm.kv_transfer import (
3737
SUPPORTED_BACKENDS as SUPPORTED_KV_CONNECTOR_BACKENDS,
3838
)
3939
from ray.llm._internal.serve.observability.logging import get_logger
@@ -455,7 +455,7 @@ def get_engine_config(self) -> EngineConfigType:
455455
return self._engine_config
456456

457457
if self.llm_engine == LLMEngine.vLLM:
458-
from ray.llm._internal.serve.deployments.llm.vllm.vllm_models import (
458+
from ray.llm._internal.serve.engines.vllm.vllm_models import (
459459
VLLMEngineConfig,
460460
)
461461

python/ray/llm/_internal/serve/configs/openai_api_models.py renamed to python/ray/llm/_internal/serve/core/configs/openai_api_models.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from vllm.utils import random_uuid
3030

3131
if TYPE_CHECKING:
32-
from ray.llm._internal.serve.configs.server_models import LLMConfig
32+
from ray.llm._internal.serve.core.configs.llm_config import LLMConfig
3333

3434

3535
class ChatCompletionRequest(vLLMChatCompletionRequest):
@@ -177,6 +177,9 @@ def to_model_metadata(
177177
model_config: The model's YAML config.
178178
overrides: should only be set for LoRA fine-tuned models. The
179179
overrides of the fine-tuned model metadata.
180+
181+
Returns:
182+
A ModelCard object.
180183
"""
181184
metadata = {
182185
"model_id": model_config.model_id,

0 commit comments

Comments
 (0)