Skip to content

Commit

Permalink
[EAGLE-5448]: upload improvements (#498)
Browse files Browse the repository at this point in the history
* don't use shell for hf token validation

* update dep in dockerfile

* change version
  • Loading branch information
zeiler authored Jan 24, 2025
1 parent 21072ba commit 0830812
Show file tree
Hide file tree
Showing 11 changed files with 153 additions and 57 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## [[11.0.7]](https://github.com/Clarifai/clarifai-python/releases/tag/11.0.7) - [PyPI](https://pypi.org/project/clarifai/11.0.7/) - 2025-01-24

### Changed

- Updated model upload experience [(#498)] (https://github.com/Clarifai/clarifai-python/pull/498)

## [[11.0.6]](https://github.com/Clarifai/clarifai-python/releases/tag/11.0.6) - [PyPI](https://pypi.org/project/clarifai/11.0.6/) - 2025-01-24

### Changed
Expand Down
2 changes: 1 addition & 1 deletion clarifai/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "11.0.6"
__version__ = "11.0.7"
4 changes: 4 additions & 0 deletions clarifai/cli/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from clarifai.cli.base import main

if __name__ == "__main__":
main()
3 changes: 2 additions & 1 deletion clarifai/cli/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,6 @@ def login(ctx, config, env, user_id):
# Import the CLI commands to register them
load_command_modules()

if __name__ == '__main__':

def main():
cli()
25 changes: 23 additions & 2 deletions clarifai/cli/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@

@cli.group(['model'])
def model():
"""Manage models: upload, test locally, run_locally, predict"""
pass
"""Manage models: upload, test locally, run locally, predict, and more"""


@model.command()
Expand Down Expand Up @@ -34,6 +33,28 @@ def upload(model_path, download_checkpoints, skip_dockerfile):
model_upload.main(model_path, download_checkpoints, skip_dockerfile)


@model.command()
@click.option(
'--model_path',
type=click.Path(exists=True),
required=True,
help='Path to the model directory.')
@click.option(
'--out_path',
type=click.Path(exists=False),
required=False,
default=None,
help=
'Option path to write the checkpoints to. This will place them in {out_path}/ If not provided it will default to {model_path}/1/checkpoints where the config.yaml is read..'
)
def download_checkpoints(model_path, out_path):
"""Download checkpoints from external source to local model_path"""

from clarifai.runners.models.model_upload import ModelUploader
uploader = ModelUploader(model_path, download_validation_only=True)
uploader.download_checkpoints(out_path)


@model.command()
@click.option(
'--model_path',
Expand Down
93 changes: 66 additions & 27 deletions clarifai/runners/dockerfile_template/Dockerfile.template
Original file line number Diff line number Diff line change
@@ -1,43 +1,82 @@
FROM --platform=$TARGETPLATFORM ${BASE_IMAGE} as build
# syntax=docker/dockerfile:1
#############################
# User specific requirements installed in the pip_packages
#############################
FROM --platform=$TARGETPLATFORM ${BUILDER_IMAGE} as pip_packages

ENV DEBIAN_FRONTEND=noninteractive
COPY --link requirements.txt /home/nonroot/requirements.txt

# Update clarifai package so we always have latest protocol to the API. Everything should land in /venv
RUN pip install --no-cache-dir -r /home/nonroot/requirements.txt && \
(pip install --upgrade --upgrade-strategy only-if-needed --no-deps --no-cache-dir clarifai clarifai-grpc clarifai-protocol || true)
#############################

#############################
# User specific requirements
# Downloader dependencies image
#############################
COPY requirements.txt .
FROM --platform=$TARGETPLATFORM ${DOWNLOADER_IMAGE} as downloader

# Install requirements and clarifai package and cleanup before leaving this line.
# Note(zeiler): this could be in a future template as {{model_python_deps}}
RUN pip install --no-cache-dir -r requirements.txt && \
pip install --no-cache-dir clarifai
# make sure we have the latest clarifai package.
RUN (pip install --upgrade --upgrade-strategy only-if-needed --no-cache-dir clarifai clarifai-grpc clarifai-protocol || true)
#####

# These will be set by the templaing system.
ENV CLARIFAI_PAT=${CLARIFAI_PAT}
ENV CLARIFAI_USER_ID=${CLARIFAI_USER_ID}
ENV CLARIFAI_RUNNER_ID=${CLARIFAI_RUNNER_ID}
ENV CLARIFAI_NODEPOOL_ID=${CLARIFAI_NODEPOOL_ID}
ENV CLARIFAI_COMPUTE_CLUSTER_ID=${CLARIFAI_COMPUTE_CLUSTER_ID}
ENV CLARIFAI_API_BASE=${CLARIFAI_API_BASE}

#############################
# Final runtime image
#############################
FROM --platform=$TARGETPLATFORM ${RUNTIME_IMAGE} as final

# Set the NUMBA cache dir to /tmp
ENV NUMBA_CACHE_DIR=/tmp/numba_cache
# Set the TORCHINDUCTOR cache dir to /tmp
ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torchinductor_cache
ENV HOME=/tmp
# The CLARIFAI* will be set by the templaing system.
ENV NUMBA_CACHE_DIR=/tmp/numba_cache \
TORCHINDUCTOR_CACHE_DIR=/tmp/torchinductor_cache \
HOME=/tmp \
DEBIAN_FRONTEND=noninteractive

# Set the working directory to /app
WORKDIR /app
#####
# Copy the python requirements needed to download checkpoints
#####
COPY --link=true --from=downloader /venv /venv
#####

# Copy the current folder into /app/model_dir that the SDK will expect.
# Note(zeiler): would be nice to exclude checkpoints in case they were pre-downloaded.
COPY . /app/model_dir/${name}
#####
# Copy the files needed to download
#####
# This creates the directory that HF downloader will populate and with nonroot:nonroot permissions up.
COPY --chown=nonroot:nonroot downloader/unused.yaml /home/nonroot/main/1/checkpoints/.cache/unused.yaml

# Add the model directory to the python path.
ENV PYTHONPATH=${PYTHONPATH}:/app/model_dir/${name}
#####
# Download checkpoints
COPY --link=true config.yaml /home/nonroot/main/
RUN ["python", "-m", "clarifai.cli", "model", "download-checkpoints", "--model_path", "/home/nonroot/main", "--out_path", "/home/nonroot/main"]
#####

ENTRYPOINT ["python", "-m", "clarifai.runners.server"]

#####
# Copy the python packages from the previous stage.
COPY --link=true --from=pip_packages /venv /venv
#####

# Copy in the actual files like config.yaml, requirements.txt, and most importantly 1/model.py
# for the actual model.
# If checkpoints aren't downloaded since a checkpoints: block is not provided, then they will
# be in the build context and copied here as well.
COPY --link=true 1/model.py /home/nonroot/main/1/model.py
# At this point we only need these for validation in the SDK.
COPY --link=true requirements.txt config.yaml /home/nonroot/main/

# Add the model directory to the python path.
ENV PYTHONPATH=${PYTHONPATH}:/home/nonroot/main \
CLARIFAI_PAT=${CLARIFAI_PAT} \
CLARIFAI_USER_ID=${CLARIFAI_USER_ID} \
CLARIFAI_RUNNER_ID=${CLARIFAI_RUNNER_ID} \
CLARIFAI_NODEPOOL_ID=${CLARIFAI_NODEPOOL_ID} \
CLARIFAI_COMPUTE_CLUSTER_ID=${CLARIFAI_COMPUTE_CLUSTER_ID} \
CLARIFAI_API_BASE=${CLARIFAI_API_BASE}

# Finally run the clarifai entrypoint to start the runner loop and local dev server.
# Note(zeiler): we may want to make this a clarifai CLI call.
CMD ["--model_path", "/app/model_dir/main"]
ENTRYPOINT ["python", "-m", "clarifai.runners.server"]
CMD ["--model_path", "/home/nonroot/main"]
#############################
53 changes: 39 additions & 14 deletions clarifai/runners/models/model_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
from rich.markup import escape

from clarifai.client import BaseClient
from clarifai.runners.utils.const import (AVAILABLE_PYTHON_IMAGES, AVAILABLE_TORCH_IMAGES,
CONCEPTS_REQUIRED_MODEL_TYPE, DEFAULT_PYTHON_VERSION,
PYTHON_BASE_IMAGE, TORCH_BASE_IMAGE)
from clarifai.runners.utils.const import (
AVAILABLE_PYTHON_IMAGES, AVAILABLE_TORCH_IMAGES, CONCEPTS_REQUIRED_MODEL_TYPE,
DEFAULT_PYTHON_VERSION, PYTHON_BUILDER_IMAGE, PYTHON_RUNTIME_IMAGE, TORCH_BASE_IMAGE)
from clarifai.runners.utils.loader import HuggingFaceLoader
from clarifai.urls.helper import ClarifaiUrlHelper
from clarifai.utils.logging import logger
Expand Down Expand Up @@ -247,6 +247,10 @@ def _parse_requirements(self):
if match:
dependency = match.group('dependency')
version = match.group('version')
if dependency == "torch" and line.find(
'whl/cpu') > 0: # Ignore torch-cpu whl files, use base mage.
continue

deendencies_version[dependency] = version if version else None
return deendencies_version

Expand Down Expand Up @@ -279,28 +283,37 @@ def create_dockerfile(self):
)
python_version = DEFAULT_PYTHON_VERSION

base_image = PYTHON_BASE_IMAGE.format(python_version=python_version)
# This is always the final image used for runtime.
runtime_image = PYTHON_RUNTIME_IMAGE.format(python_version=python_version)
builder_image = PYTHON_BUILDER_IMAGE.format(python_version=python_version)
downloader_image = PYTHON_BUILDER_IMAGE.format(python_version=python_version)

# Parse the requirements.txt file to determine the base image
dependencies = self._parse_requirements()
if 'torch' in dependencies and dependencies['torch']:
torch_version = dependencies['torch']

for image in AVAILABLE_TORCH_IMAGES:
# Sort in reverse so that newer cuda versions come first and are preferred.
for image in sorted(AVAILABLE_TORCH_IMAGES, reverse=True):
if torch_version in image and f'py{python_version}' in image:
cuda_version = image.split('-')[-1].replace('cuda', '')
base_image = TORCH_BASE_IMAGE.format(
builder_image = TORCH_BASE_IMAGE.format(
torch_version=torch_version,
python_version=python_version,
cuda_version=cuda_version,
)
# download_image = base_image
logger.info(f"Using Torch version {torch_version} base image to build the Docker image")
break

# else: # if not torch then use the download image for the base image too
# # base_image = download_image
# requirements_image = base_image
# Replace placeholders with actual values
dockerfile_content = dockerfile_template.safe_substitute(
name='main',
BASE_IMAGE=base_image,
BUILDER_IMAGE=builder_image, # for pip requirements
RUNTIME_IMAGE=runtime_image, # for runtime
DOWNLOADER_IMAGE=downloader_image, # for downloading checkpoints
)

# Write Dockerfile
Expand All @@ -309,7 +322,10 @@ def create_dockerfile(self):

@property
def checkpoint_path(self):
return os.path.join(self.folder, self.checkpoint_suffix)
return self._checkpoint_path(self.folder)

def _checkpoint_path(self, folder):
return os.path.join(folder, self.checkpoint_suffix)

@property
def checkpoint_suffix(self):
Expand All @@ -319,7 +335,14 @@ def checkpoint_suffix(self):
def tar_file(self):
return f"{self.folder}.tar.gz"

def download_checkpoints(self):
def download_checkpoints(self, checkpoint_path_override: str = None):
"""
Downloads the checkpoints specified in the config file.
:param checkpoint_path_override: The path to download the checkpoints to. If not provided, the
default path is used based on the folder ModelUploader was initialized with. The
checkpoint_suffix will be appended to the path.
"""
if not self.config.get("checkpoints"):
logger.info("No checkpoints specified in the config file")
return True
Expand All @@ -329,7 +352,9 @@ def download_checkpoints(self):
success = True
if loader_type == "huggingface":
loader = HuggingFaceLoader(repo_id=repo_id, token=hf_token)
success = loader.download_checkpoints(self.checkpoint_path)
path = self._checkpoint_path(
checkpoint_path_override) if checkpoint_path_override else self.checkpoint_path
success = loader.download_checkpoints(path)

if loader_type:
if not success:
Expand Down Expand Up @@ -462,7 +487,7 @@ def filter_func(tarinfo):
f"request_id: {response.status.req_id}",
end='\r',
flush=True)
print()
logger.info("")
if response.status.code != status_code_pb2.MODEL_BUILDING:
logger.error(f"Failed to upload model version: {response}")
return
Expand Down Expand Up @@ -552,11 +577,11 @@ def monitor_model_build(self):
for log_entry in logs.log_entries:
if log_entry.url not in seen_logs:
seen_logs.add(log_entry.url)
print(f"Model Building Logs...: {escape(log_entry.message.strip())}")
logger.info(f"{escape(log_entry.message.strip())}")
time.sleep(1)
elif status_code == status_code_pb2.MODEL_TRAINED:
logger.info(f"\nModel build complete! (elapsed {time.time() - st:.1f}s)")
logger.info(f"Check out the model at {self.model_url}")
logger.info(f"Check out the model at {self.model_url} version: {self.model_version_id}")
return True
else:
logger.info(
Expand Down
11 changes: 4 additions & 7 deletions clarifai/runners/utils/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,26 @@

registry = os.environ.get('CLARIFAI_BASE_IMAGE_REGISTRY', 'public.ecr.aws/clarifai-models')

PYTHON_BASE_IMAGE = registry + '/python-base:{python_version}'
TORCH_BASE_IMAGE = registry + '/torch:{torch_version}-py{python_version}-cuda{cuda_version}'
PYTHON_BUILDER_IMAGE = registry + '/python-base:builder-{python_version}'
PYTHON_RUNTIME_IMAGE = registry + '/python-base:runtime-{python_version}'
TORCH_BASE_IMAGE = registry + '/torch:builder-{torch_version}-py{python_version}-cuda{cuda_version}'

# List of available python base images
AVAILABLE_PYTHON_IMAGES = ['3.11', '3.12']

DEFAULT_PYTHON_VERSION = 3.12

# List of available torch images
# Keep sorted by most recent cuda version.
AVAILABLE_TORCH_IMAGES = [
'2.4.0-py3.11-cuda124',
'2.4.1-py3.11-cuda124',
'2.5.1-py3.11-cuda124',
'2.4.0-py3.12-cuda124',
'2.4.1-py3.12-cuda124',
'2.5.1-py3.12-cuda124',
# '2.2.2-py3.13-cuda121',
# '2.3.1-py3.13-cuda121',
# '2.4.0-py3.13-cuda121',
# '2.4.0-py3.13-cuda124',
# '2.4.1-py3.13-cuda121',
# '2.4.1-py3.13-cuda124',
# '2.5.1-py3.13-cuda121',
# '2.5.1-py3.13-cuda124',
]
CONCEPTS_REQUIRED_MODEL_TYPE = [
Expand Down
7 changes: 5 additions & 2 deletions clarifai/runners/utils/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import json
import os
import shutil
import subprocess

from clarifai.utils.logging import logger

Expand All @@ -17,7 +16,11 @@ def __init__(self, repo_id=None, token=None):
self.token = token
if token:
if self.validate_hftoken(token):
subprocess.run(f'huggingface-cli login --token={os.environ["HF_TOKEN"]}', shell=True)
try:
from huggingface_hub import login
except ImportError:
raise ImportError(self.HF_DOWNLOAD_TEXT)
login(token=token)
logger.info("Hugging Face token validated")
else:
logger.info("Continuing without Hugging Face token")
Expand Down
2 changes: 1 addition & 1 deletion clarifai/utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def load_command_modules():
package_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'cli')

for _, module_name, _ in pkgutil.iter_modules([package_dir]):
if module_name != 'base': # Skip the base.py file itself
if module_name not in ['base', '__main__']: # Skip the base.py and __main__ file itself
importlib.import_module(f'clarifai.cli.{module_name}')


Expand Down
4 changes: 2 additions & 2 deletions clarifai/utils/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ def _configure_logger(name: str, logger_level: Union[int, str] = logging.NOTSET)
else:
# Add the new rich handler and formatter
handler = RichHandler(
rich_tracebacks=True, log_time_format="%Y-%m-%d %H:%M:%S", console=Console(width=255))
formatter = logging.Formatter('%(name)s: %(message)s')
rich_tracebacks=True, log_time_format="%Y-%m-%d %H:%M:%S.%f", console=Console(width=255))
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)

Expand Down

0 comments on commit 0830812

Please sign in to comment.