diff --git a/CHANGELOG.md b/CHANGELOG.md index 932550ef..a3bebdf4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## [[11.0.7]](https://github.com/Clarifai/clarifai-python/releases/tag/11.0.7) - [PyPI](https://pypi.org/project/clarifai/11.0.7/) - 2025-01-24 + +### Changed + + - Updated model upload experience [(#498)] (https://github.com/Clarifai/clarifai-python/pull/498) + ## [[11.0.6]](https://github.com/Clarifai/clarifai-python/releases/tag/11.0.6) - [PyPI](https://pypi.org/project/clarifai/11.0.6/) - 2025-01-24 ### Changed diff --git a/clarifai/__init__.py b/clarifai/__init__.py index 378690c1..5e49b53e 100644 --- a/clarifai/__init__.py +++ b/clarifai/__init__.py @@ -1 +1 @@ -__version__ = "11.0.6" +__version__ = "11.0.7" diff --git a/clarifai/cli/__main__.py b/clarifai/cli/__main__.py new file mode 100644 index 00000000..24c1e3d9 --- /dev/null +++ b/clarifai/cli/__main__.py @@ -0,0 +1,4 @@ +from clarifai.cli.base import main + +if __name__ == "__main__": + main() diff --git a/clarifai/cli/base.py b/clarifai/cli/base.py index 38529a24..ccb550b5 100644 --- a/clarifai/cli/base.py +++ b/clarifai/cli/base.py @@ -108,5 +108,6 @@ def login(ctx, config, env, user_id): # Import the CLI commands to register them load_command_modules() -if __name__ == '__main__': + +def main(): cli() diff --git a/clarifai/cli/model.py b/clarifai/cli/model.py index dbccd571..ec618174 100644 --- a/clarifai/cli/model.py +++ b/clarifai/cli/model.py @@ -5,8 +5,7 @@ @cli.group(['model']) def model(): - """Manage models: upload, test locally, run_locally, predict""" - pass + """Manage models: upload, test locally, run locally, predict, and more""" @model.command() @@ -34,6 +33,28 @@ def upload(model_path, download_checkpoints, skip_dockerfile): model_upload.main(model_path, download_checkpoints, skip_dockerfile) +@model.command() +@click.option( + '--model_path', + type=click.Path(exists=True), + required=True, + help='Path to the model directory.') +@click.option( + '--out_path', + type=click.Path(exists=False), + required=False, + default=None, + help= + 'Option path to write the checkpoints to. This will place them in {out_path}/ If not provided it will default to {model_path}/1/checkpoints where the config.yaml is read..' +) +def download_checkpoints(model_path, out_path): + """Download checkpoints from external source to local model_path""" + + from clarifai.runners.models.model_upload import ModelUploader + uploader = ModelUploader(model_path, download_validation_only=True) + uploader.download_checkpoints(out_path) + + @model.command() @click.option( '--model_path', diff --git a/clarifai/runners/dockerfile_template/Dockerfile.template b/clarifai/runners/dockerfile_template/Dockerfile.template index 3c0e5736..c0625e7c 100644 --- a/clarifai/runners/dockerfile_template/Dockerfile.template +++ b/clarifai/runners/dockerfile_template/Dockerfile.template @@ -1,43 +1,82 @@ -FROM --platform=$TARGETPLATFORM ${BASE_IMAGE} as build +# syntax=docker/dockerfile:1 +############################# +# User specific requirements installed in the pip_packages +############################# +FROM --platform=$TARGETPLATFORM ${BUILDER_IMAGE} as pip_packages -ENV DEBIAN_FRONTEND=noninteractive +COPY --link requirements.txt /home/nonroot/requirements.txt + +# Update clarifai package so we always have latest protocol to the API. Everything should land in /venv +RUN pip install --no-cache-dir -r /home/nonroot/requirements.txt && \ + (pip install --upgrade --upgrade-strategy only-if-needed --no-deps --no-cache-dir clarifai clarifai-grpc clarifai-protocol || true) +############################# ############################# -# User specific requirements +# Downloader dependencies image ############################# -COPY requirements.txt . +FROM --platform=$TARGETPLATFORM ${DOWNLOADER_IMAGE} as downloader -# Install requirements and clarifai package and cleanup before leaving this line. -# Note(zeiler): this could be in a future template as {{model_python_deps}} -RUN pip install --no-cache-dir -r requirements.txt && \ - pip install --no-cache-dir clarifai +# make sure we have the latest clarifai package. +RUN (pip install --upgrade --upgrade-strategy only-if-needed --no-cache-dir clarifai clarifai-grpc clarifai-protocol || true) +##### -# These will be set by the templaing system. -ENV CLARIFAI_PAT=${CLARIFAI_PAT} -ENV CLARIFAI_USER_ID=${CLARIFAI_USER_ID} -ENV CLARIFAI_RUNNER_ID=${CLARIFAI_RUNNER_ID} -ENV CLARIFAI_NODEPOOL_ID=${CLARIFAI_NODEPOOL_ID} -ENV CLARIFAI_COMPUTE_CLUSTER_ID=${CLARIFAI_COMPUTE_CLUSTER_ID} -ENV CLARIFAI_API_BASE=${CLARIFAI_API_BASE} + +############################# +# Final runtime image +############################# +FROM --platform=$TARGETPLATFORM ${RUNTIME_IMAGE} as final # Set the NUMBA cache dir to /tmp -ENV NUMBA_CACHE_DIR=/tmp/numba_cache # Set the TORCHINDUCTOR cache dir to /tmp -ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torchinductor_cache -ENV HOME=/tmp +# The CLARIFAI* will be set by the templaing system. +ENV NUMBA_CACHE_DIR=/tmp/numba_cache \ + TORCHINDUCTOR_CACHE_DIR=/tmp/torchinductor_cache \ + HOME=/tmp \ + DEBIAN_FRONTEND=noninteractive -# Set the working directory to /app -WORKDIR /app +##### +# Copy the python requirements needed to download checkpoints +##### +COPY --link=true --from=downloader /venv /venv +##### -# Copy the current folder into /app/model_dir that the SDK will expect. -# Note(zeiler): would be nice to exclude checkpoints in case they were pre-downloaded. -COPY . /app/model_dir/${name} +##### +# Copy the files needed to download +##### +# This creates the directory that HF downloader will populate and with nonroot:nonroot permissions up. +COPY --chown=nonroot:nonroot downloader/unused.yaml /home/nonroot/main/1/checkpoints/.cache/unused.yaml -# Add the model directory to the python path. -ENV PYTHONPATH=${PYTHONPATH}:/app/model_dir/${name} +##### +# Download checkpoints +COPY --link=true config.yaml /home/nonroot/main/ +RUN ["python", "-m", "clarifai.cli", "model", "download-checkpoints", "--model_path", "/home/nonroot/main", "--out_path", "/home/nonroot/main"] +##### -ENTRYPOINT ["python", "-m", "clarifai.runners.server"] + +##### +# Copy the python packages from the previous stage. +COPY --link=true --from=pip_packages /venv /venv +##### + +# Copy in the actual files like config.yaml, requirements.txt, and most importantly 1/model.py +# for the actual model. +# If checkpoints aren't downloaded since a checkpoints: block is not provided, then they will +# be in the build context and copied here as well. +COPY --link=true 1/model.py /home/nonroot/main/1/model.py +# At this point we only need these for validation in the SDK. +COPY --link=true requirements.txt config.yaml /home/nonroot/main/ + +# Add the model directory to the python path. +ENV PYTHONPATH=${PYTHONPATH}:/home/nonroot/main \ + CLARIFAI_PAT=${CLARIFAI_PAT} \ + CLARIFAI_USER_ID=${CLARIFAI_USER_ID} \ + CLARIFAI_RUNNER_ID=${CLARIFAI_RUNNER_ID} \ + CLARIFAI_NODEPOOL_ID=${CLARIFAI_NODEPOOL_ID} \ + CLARIFAI_COMPUTE_CLUSTER_ID=${CLARIFAI_COMPUTE_CLUSTER_ID} \ + CLARIFAI_API_BASE=${CLARIFAI_API_BASE} # Finally run the clarifai entrypoint to start the runner loop and local dev server. # Note(zeiler): we may want to make this a clarifai CLI call. -CMD ["--model_path", "/app/model_dir/main"] +ENTRYPOINT ["python", "-m", "clarifai.runners.server"] +CMD ["--model_path", "/home/nonroot/main"] +############################# diff --git a/clarifai/runners/models/model_upload.py b/clarifai/runners/models/model_upload.py index 1daa89e7..88108c48 100644 --- a/clarifai/runners/models/model_upload.py +++ b/clarifai/runners/models/model_upload.py @@ -13,9 +13,9 @@ from rich.markup import escape from clarifai.client import BaseClient -from clarifai.runners.utils.const import (AVAILABLE_PYTHON_IMAGES, AVAILABLE_TORCH_IMAGES, - CONCEPTS_REQUIRED_MODEL_TYPE, DEFAULT_PYTHON_VERSION, - PYTHON_BASE_IMAGE, TORCH_BASE_IMAGE) +from clarifai.runners.utils.const import ( + AVAILABLE_PYTHON_IMAGES, AVAILABLE_TORCH_IMAGES, CONCEPTS_REQUIRED_MODEL_TYPE, + DEFAULT_PYTHON_VERSION, PYTHON_BUILDER_IMAGE, PYTHON_RUNTIME_IMAGE, TORCH_BASE_IMAGE) from clarifai.runners.utils.loader import HuggingFaceLoader from clarifai.urls.helper import ClarifaiUrlHelper from clarifai.utils.logging import logger @@ -247,6 +247,10 @@ def _parse_requirements(self): if match: dependency = match.group('dependency') version = match.group('version') + if dependency == "torch" and line.find( + 'whl/cpu') > 0: # Ignore torch-cpu whl files, use base mage. + continue + deendencies_version[dependency] = version if version else None return deendencies_version @@ -279,28 +283,37 @@ def create_dockerfile(self): ) python_version = DEFAULT_PYTHON_VERSION - base_image = PYTHON_BASE_IMAGE.format(python_version=python_version) + # This is always the final image used for runtime. + runtime_image = PYTHON_RUNTIME_IMAGE.format(python_version=python_version) + builder_image = PYTHON_BUILDER_IMAGE.format(python_version=python_version) + downloader_image = PYTHON_BUILDER_IMAGE.format(python_version=python_version) # Parse the requirements.txt file to determine the base image dependencies = self._parse_requirements() if 'torch' in dependencies and dependencies['torch']: torch_version = dependencies['torch'] - for image in AVAILABLE_TORCH_IMAGES: + # Sort in reverse so that newer cuda versions come first and are preferred. + for image in sorted(AVAILABLE_TORCH_IMAGES, reverse=True): if torch_version in image and f'py{python_version}' in image: cuda_version = image.split('-')[-1].replace('cuda', '') - base_image = TORCH_BASE_IMAGE.format( + builder_image = TORCH_BASE_IMAGE.format( torch_version=torch_version, python_version=python_version, cuda_version=cuda_version, ) + # download_image = base_image logger.info(f"Using Torch version {torch_version} base image to build the Docker image") break - + # else: # if not torch then use the download image for the base image too + # # base_image = download_image + # requirements_image = base_image # Replace placeholders with actual values dockerfile_content = dockerfile_template.safe_substitute( name='main', - BASE_IMAGE=base_image, + BUILDER_IMAGE=builder_image, # for pip requirements + RUNTIME_IMAGE=runtime_image, # for runtime + DOWNLOADER_IMAGE=downloader_image, # for downloading checkpoints ) # Write Dockerfile @@ -309,7 +322,10 @@ def create_dockerfile(self): @property def checkpoint_path(self): - return os.path.join(self.folder, self.checkpoint_suffix) + return self._checkpoint_path(self.folder) + + def _checkpoint_path(self, folder): + return os.path.join(folder, self.checkpoint_suffix) @property def checkpoint_suffix(self): @@ -319,7 +335,14 @@ def checkpoint_suffix(self): def tar_file(self): return f"{self.folder}.tar.gz" - def download_checkpoints(self): + def download_checkpoints(self, checkpoint_path_override: str = None): + """ + Downloads the checkpoints specified in the config file. + + :param checkpoint_path_override: The path to download the checkpoints to. If not provided, the + default path is used based on the folder ModelUploader was initialized with. The + checkpoint_suffix will be appended to the path. + """ if not self.config.get("checkpoints"): logger.info("No checkpoints specified in the config file") return True @@ -329,7 +352,9 @@ def download_checkpoints(self): success = True if loader_type == "huggingface": loader = HuggingFaceLoader(repo_id=repo_id, token=hf_token) - success = loader.download_checkpoints(self.checkpoint_path) + path = self._checkpoint_path( + checkpoint_path_override) if checkpoint_path_override else self.checkpoint_path + success = loader.download_checkpoints(path) if loader_type: if not success: @@ -462,7 +487,7 @@ def filter_func(tarinfo): f"request_id: {response.status.req_id}", end='\r', flush=True) - print() + logger.info("") if response.status.code != status_code_pb2.MODEL_BUILDING: logger.error(f"Failed to upload model version: {response}") return @@ -552,11 +577,11 @@ def monitor_model_build(self): for log_entry in logs.log_entries: if log_entry.url not in seen_logs: seen_logs.add(log_entry.url) - print(f"Model Building Logs...: {escape(log_entry.message.strip())}") + logger.info(f"{escape(log_entry.message.strip())}") time.sleep(1) elif status_code == status_code_pb2.MODEL_TRAINED: logger.info(f"\nModel build complete! (elapsed {time.time() - st:.1f}s)") - logger.info(f"Check out the model at {self.model_url}") + logger.info(f"Check out the model at {self.model_url} version: {self.model_version_id}") return True else: logger.info( diff --git a/clarifai/runners/utils/const.py b/clarifai/runners/utils/const.py index fb21de40..de0b368b 100644 --- a/clarifai/runners/utils/const.py +++ b/clarifai/runners/utils/const.py @@ -2,8 +2,9 @@ registry = os.environ.get('CLARIFAI_BASE_IMAGE_REGISTRY', 'public.ecr.aws/clarifai-models') -PYTHON_BASE_IMAGE = registry + '/python-base:{python_version}' -TORCH_BASE_IMAGE = registry + '/torch:{torch_version}-py{python_version}-cuda{cuda_version}' +PYTHON_BUILDER_IMAGE = registry + '/python-base:builder-{python_version}' +PYTHON_RUNTIME_IMAGE = registry + '/python-base:runtime-{python_version}' +TORCH_BASE_IMAGE = registry + '/torch:builder-{torch_version}-py{python_version}-cuda{cuda_version}' # List of available python base images AVAILABLE_PYTHON_IMAGES = ['3.11', '3.12'] @@ -11,6 +12,7 @@ DEFAULT_PYTHON_VERSION = 3.12 # List of available torch images +# Keep sorted by most recent cuda version. AVAILABLE_TORCH_IMAGES = [ '2.4.0-py3.11-cuda124', '2.4.1-py3.11-cuda124', @@ -18,13 +20,8 @@ '2.4.0-py3.12-cuda124', '2.4.1-py3.12-cuda124', '2.5.1-py3.12-cuda124', - # '2.2.2-py3.13-cuda121', - # '2.3.1-py3.13-cuda121', - # '2.4.0-py3.13-cuda121', # '2.4.0-py3.13-cuda124', - # '2.4.1-py3.13-cuda121', # '2.4.1-py3.13-cuda124', - # '2.5.1-py3.13-cuda121', # '2.5.1-py3.13-cuda124', ] CONCEPTS_REQUIRED_MODEL_TYPE = [ diff --git a/clarifai/runners/utils/loader.py b/clarifai/runners/utils/loader.py index c62f73fb..ab595a4d 100644 --- a/clarifai/runners/utils/loader.py +++ b/clarifai/runners/utils/loader.py @@ -3,7 +3,6 @@ import json import os import shutil -import subprocess from clarifai.utils.logging import logger @@ -17,7 +16,11 @@ def __init__(self, repo_id=None, token=None): self.token = token if token: if self.validate_hftoken(token): - subprocess.run(f'huggingface-cli login --token={os.environ["HF_TOKEN"]}', shell=True) + try: + from huggingface_hub import login + except ImportError: + raise ImportError(self.HF_DOWNLOAD_TEXT) + login(token=token) logger.info("Hugging Face token validated") else: logger.info("Continuing without Hugging Face token") diff --git a/clarifai/utils/cli.py b/clarifai/utils/cli.py index 6faa5bf1..286b6881 100644 --- a/clarifai/utils/cli.py +++ b/clarifai/utils/cli.py @@ -46,7 +46,7 @@ def load_command_modules(): package_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'cli') for _, module_name, _ in pkgutil.iter_modules([package_dir]): - if module_name != 'base': # Skip the base.py file itself + if module_name not in ['base', '__main__']: # Skip the base.py and __main__ file itself importlib.import_module(f'clarifai.cli.{module_name}') diff --git a/clarifai/utils/logging.py b/clarifai/utils/logging.py index 43c0ba15..031d10b5 100644 --- a/clarifai/utils/logging.py +++ b/clarifai/utils/logging.py @@ -143,8 +143,8 @@ def _configure_logger(name: str, logger_level: Union[int, str] = logging.NOTSET) else: # Add the new rich handler and formatter handler = RichHandler( - rich_tracebacks=True, log_time_format="%Y-%m-%d %H:%M:%S", console=Console(width=255)) - formatter = logging.Formatter('%(name)s: %(message)s') + rich_tracebacks=True, log_time_format="%Y-%m-%d %H:%M:%S.%f", console=Console(width=255)) + formatter = logging.Formatter('%(message)s') handler.setFormatter(formatter) logger.addHandler(handler)