chore(qol): update CLI options and performance upgrade for build cache (

#997) * chore(qol): update CLI options and performance upgrade for build cache Signed-off-by: paperspace <[email protected]> * chore: update default python version for dev Signed-off-by: paperspace <[email protected]> * fix: install custom tar.gz models Signed-off-by: paperspace <[email protected]> --------- Signed-off-by: paperspace <[email protected]>
bentoml · May 26, 2024 · 3f048d8 · 3f048d8
1 parent bc0be03
commit 3f048d8
Show file tree

Hide file tree

Showing 14 changed files with 274 additions and 203 deletions.
diff --git a/.python-version-default b/.python-version-default
@@ -1 +1 @@
-3.9
+3.11
diff --git a/openllm-core/src/openllm_core/_schemas.py b/openllm-core/src/openllm_core/_schemas.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-import pydantic, inflection, orjson, typing as t
+import pydantic, orjson, typing as t
 from ._configuration import LLMConfig
 from .utils import gen_random_uuid
 from ._typing_compat import Required, TypedDict, LiteralString
@@ -47,14 +47,10 @@ class GenerationInput(pydantic.BaseModel):
   request_id: t.Optional[str] = pydantic.Field(default=None)
   adapter_name: t.Optional[str] = pydantic.Field(default=None)
 
-  _class_ref: t.ClassVar[type[LLMConfig]] = pydantic.PrivateAttr()
-
-  @pydantic.field_validator('llm_config')
-  @classmethod
-  def llm_config_validator(cls, v: LLMConfig | dict[str, t.Any]) -> LLMConfig:
-    if isinstance(v, dict):
-      return cls._class_ref.model_construct_env(**v)
-    return v
+  def __init__(self, *, _internal=False, **data: t.Any):
+    if not _internal:
+      raise RuntimeError('This class is not meant to be used directly. Use "from_config" instead')
+    super().__init__(**data)
 
   @pydantic.field_validator('stop')
   @classmethod
@@ -81,35 +77,9 @@ def ser_model(self) -> dict[str, t.Any]:
       flattened['stop_token_ids'] = self.stop_token_ids
     return flattened
 
-  def __init__(self, /, *, _internal: bool = False, **data: t.Any) -> None:
-    if not _internal:
-      raise RuntimeError(
-        f'Cannot instantiate GenerationInput directly. Use "{self.__class__.__qualname__}.from_dict" instead.'
-      )
-    super().__init__(**data)
-
-  @classmethod
-  def from_dict(cls, structured: GenerationInputDict) -> GenerationInput:
-    if not hasattr(cls, '_class_ref'):
-      raise ValueError(
-        'Cannot use "from_dict" from a raw GenerationInput class. Currently only supports class created from "from_config".'
-      )
-    filtered: dict[str, t.Any] = {k: v for k, v in structured.items() if v is not None}
-    llm_config: dict[str, t.Any] | None = filtered.pop('llm_config', None)
-    if llm_config is not None:
-      filtered['llm_config'] = cls._class_ref.model_construct_env(**llm_config)
-
-    return cls(_internal=True, **filtered)
-
   @classmethod
   def from_config(cls, llm_config: LLMConfig) -> type[GenerationInput]:
-    klass = pydantic.create_model(
-      inflection.camelize(llm_config['start_name']) + 'GenerationInput',
-      __base__=cls,
-      llm_config=(type(llm_config), llm_config),
-      _class_ref=(llm_config.__class__, pydantic.PrivateAttr(default=llm_config.__class__)),
-    )
-    return klass
+    return cls(_internal=True, llm_config=llm_config)
 
 
 # NOTE: parameters from vllm.RequestOutput and vllm.CompletionOutput since vllm is not available on CPU.

diff --git a/openllm-core/src/openllm_core/config/configuration_auto.py b/openllm-core/src/openllm_core/config/configuration_auto.py
@@ -218,3 +218,16 @@ def from_bentomodel(cls, bentomodel: Model, **attrs: t.Any) -> openllm_core.LLMC
     raise ValueError(
       f"Failed to determine config class for '{bentomodel.name}'. Make sure {bentomodel.name} is saved with openllm."
     )
+
+  @classmethod
+  def from_id(cls, model_id: str, *, trust_remote_code: bool = False, **attrs: t.Any) -> openllm_core.LLMConfig:
+    import transformers
+
+    config = transformers.AutoConfig.from_pretrained(model_id, trust_remote_code=trust_remote_code)
+    for arch in config.architectures:
+      if arch in cls._architecture_mappings:
+        return cls.for_model(cls._architecture_mappings[arch]).model_construct_env(**attrs)
+    else:
+      raise RuntimeError(
+        f'Failed to determine config class for {model_id}. Got {config.architectures}, which is not yet supported (Supported: {list(cls._architecture_mappings.keys())})'
+      )
diff --git a/openllm-core/src/openllm_core/utils/__init__.py b/openllm-core/src/openllm_core/utils/__init__.py
@@ -14,6 +14,7 @@
   DEBUG as DEBUG,
   SHOW_CODEGEN as SHOW_CODEGEN,
   MYPY as MYPY,
+  OPENLLM_DEV_BUILD as OPENLLM_DEV_BUILD,
 )
 
 if t.TYPE_CHECKING:

diff --git a/openllm-core/src/openllm_core/utils/__init__.pyi b/openllm-core/src/openllm_core/utils/__init__.pyi
@@ -44,6 +44,7 @@ DEBUG_ENV_VAR: str = ...
 QUIET_ENV_VAR: str = ...
 DEV_DEBUG_VAR: str = ...
 WARNING_ENV_VAR: str = ...
+OPENLLM_DEV_BUILD: str = ...
 
 _T = TypeVar('_T')
 R = TypeVar('R')

diff --git a/openllm-core/src/openllm_core/utils/_constants.py b/openllm-core/src/openllm_core/utils/_constants.py
@@ -9,6 +9,7 @@
 DEV_DEBUG_VAR = 'DEBUG'
 
 ENV_VARS_TRUE_VALUES = {'1', 'ON', 'YES', 'TRUE'}
+OPENLLM_DEV_BUILD = 'OPENLLM_DEV_BUILD'
 
 
 def check_bool_env(env: str, default: bool = True):

diff --git a/openllm-python/pyproject.toml b/openllm-python/pyproject.toml
@@ -38,7 +38,7 @@ classifiers = [
     "Programming Language :: Python :: Implementation :: PyPy",
 ]
 dependencies = [
-    "bentoml[io]>=1.2",
+    "bentoml[io]>=1.2.16",
     "transformers[torch,tokenizers]>=4.36.0",
     "openllm-client>=0.5.0-alpha.14",
     "openllm-core>=0.5.0-alpha.14",
@@ -112,7 +112,7 @@ gemma = ["xformers"]
 ggml = ["ctransformers"]
 gpt-neox = ["xformers"]
 gptq = ["auto-gptq[triton]>=0.4.2"]
-grpc = ["bentoml[grpc]>=1.2"]
+grpc = ["bentoml[grpc]>=1.2.16"]
 llama = ["xformers"]
 mistral = ["xformers"]
 mixtral = ["xformers"]