diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2c862e8..afff26a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -99,22 +99,7 @@ jobs: run: make setup - name: Test local handler execution - run: | - echo "Testing handler with all test_*.json files..." - passed=0 - total=0 - for test_file in test_*.json; do - total=$((total + 1)) - echo "Testing with $test_file..." - if timeout 30s env PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat "$test_file")" uv run python src/handler.py >/dev/null 2>&1; then - echo "✓ $test_file: PASSED" - passed=$((passed + 1)) - else - echo "✗ $test_file: FAILED" - exit 1 - fi - done - echo "All $passed/$total handler tests passed!" + run: make test-handler release: runs-on: ubuntu-latest diff --git a/CLAUDE.md b/CLAUDE.md index c4be927..1de083f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -68,12 +68,8 @@ make build-cpu # Build CPU-only Docker image ### Local Testing ```bash -# Test handler locally with test_input.json -PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_input.json)" uv run python src/handler.py - -# Test with other test files -PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_class_input.json)" uv run python src/handler.py -PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_hf_input.json)" uv run python src/handler.py +# Test handler locally with test*.json +make test-handler ``` ### Submodule Management @@ -122,6 +118,14 @@ The handler automatically detects and utilizes `/runpod-volume` for persistent w - **Optimized Resource Usage**: Shared caches across multiple endpoints while maintaining isolation - **ML Model Efficiency**: Large HF models cached on volume prevent "No space left on device" errors +### HuggingFace Model Acceleration +The system automatically leverages HuggingFace's native acceleration features: +- **hf_transfer**: Accelerated downloads for large model files when available +- **hf_xet**: Automatic chunk-level deduplication and incremental downloads (huggingface_hub>=0.32.0) +- **Native Integration**: Uses HF Hub's `snapshot_download()` for optimal caching and acceleration +- **Transparent Operation**: No code changes needed - acceleration is automatic when repositories support it +- **Token Support**: Configured via `HF_TOKEN` environment variable for private repositories + ## Configuration ### Environment Variables @@ -160,11 +164,6 @@ make test-integration # Run integration tests only make test-coverage # Run tests with coverage report make test-fast # Run tests with fail-fast mode make test-handler # Test handler locally with all test_*.json files (same as CI) - -# Test handler locally with specific test files -PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_input.json)" uv run python src/handler.py -PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_class_input.json)" uv run python src/handler.py -PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_hf_input.json)" uv run python src/handler.py ``` ### Testing Framework @@ -261,3 +260,8 @@ Configure these in GitHub repository settings: ### Docker Guidelines - Docker container should never refer to src/ + +- Always run `make quality-check` before pronouncing you have finished your work +- Always use `git mv` when moving existing files around + +- Run the command `make test-handler` to run checks on test files. Do not try to run it one by one like `Bash(env RUNPOD_TEST_INPUT="$(cat test_input.json)" PYTHONPATH=. uv run python handler.py)` diff --git a/Dockerfile b/Dockerfile index 0bb269d..6323086 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && chmod +x /usr/local/bin/uv # Copy app code and install dependencies -COPY README.md src/* pyproject.toml uv.lock test_*.json test-handler.sh ./ +COPY README.md src/* pyproject.toml uv.lock ./ RUN uv sync @@ -19,11 +19,12 @@ FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime WORKDIR /app +# Install nala for system package acceleration in runtime stage +RUN apt-get update && apt-get install -y --no-install-recommends nala \ + && rm -rf /var/lib/apt/lists/* + # Copy app and uv binary from builder COPY --from=builder /app /app COPY --from=builder /usr/local/bin/uv /usr/local/bin/uv -# Clean up any unnecessary system tools -RUN rm -rf /var/lib/apt/lists/* - CMD ["uv", "run", "handler.py"] \ No newline at end of file diff --git a/Dockerfile-cpu b/Dockerfile-cpu index e0911ff..1ffe7d3 100644 --- a/Dockerfile-cpu +++ b/Dockerfile-cpu @@ -11,7 +11,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && chmod +x /usr/local/bin/uv # Copy app files and install deps -COPY README.md src/* pyproject.toml uv.lock test_*.json test-handler.sh ./ +COPY README.md src/* pyproject.toml uv.lock ./ RUN uv sync # Stage 2: Runtime stage @@ -21,7 +21,7 @@ WORKDIR /app # Install runtime dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ - curl ca-certificates \ + curl ca-certificates nala \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/Makefile b/Makefile index 288b40d..c8afdf5 100644 --- a/Makefile +++ b/Makefile @@ -68,7 +68,7 @@ test-fast: # Run tests with fast-fail mode uv run pytest tests/ -v -x --tb=short test-handler: # Test handler locally with all test_*.json files - ./test-handler.sh + cd src && ./test-handler.sh # Smoke Tests (local on Mac OS) @@ -97,7 +97,7 @@ format-check: # Check code formatting # Type checking typecheck: # Check types with mypy - uv run mypy . + uv run mypy src/ # Quality gates (used in CI) -quality-check: format-check lint typecheck test-coverage +quality-check: format-check lint typecheck test-coverage test-handler diff --git a/pyproject.toml b/pyproject.toml index 2288685..d503d21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,10 @@ requires-python = ">=3.9,<3.13" dependencies = [ "cloudpickle>=3.1.1", "pydantic>=2.11.4", + "requests>=2.25.0", "runpod", + "hf_transfer>=0.1.0", + "huggingface_hub>=0.32.0", ] [dependency-groups] @@ -18,6 +21,7 @@ dev = [ "pytest-asyncio>=0.24.0", "ruff>=0.8.0", "mypy>=1.11.0", + "types-requests>=2.25.0", ] [tool.pytest.ini_options] @@ -48,40 +52,37 @@ filterwarnings = [ "ignore::pytest.PytestUnknownMarkWarning" ] -[tool.ruff] -# Exclude tetra-rp directory since it's a separate repository -exclude = [ - "tetra-rp/", -] - [tool.mypy] -# Basic configuration python_version = "3.9" -warn_return_any = true -warn_unused_configs = true -disallow_untyped_defs = false # Start lenient, can be stricter later -disallow_incomplete_defs = false -check_untyped_defs = true - -# Import discovery -mypy_path = "src" +mypy_path = ["src"] +explicit_package_bases = true namespace_packages = true - -# Error output +check_untyped_defs = true +disallow_any_generics = true +disallow_untyped_defs = false +warn_redundant_casts = true +warn_unused_ignores = true +warn_return_any = true +strict_optional = true show_error_codes = true show_column_numbers = true pretty = true - -# Exclude directories exclude = [ "tetra-rp/", - "tests/", # Start by excluding tests, can add later ] -# Per-module options [[tool.mypy.overrides]] module = [ - "runpod.*", - "cloudpickle.*", + "cloudpickle", + "runpod", + "transformers", + "hf_transfer", + "huggingface_hub", ] ignore_missing_imports = true + +[tool.ruff] +# Exclude tetra-rp directory since it's a separate repository +exclude = [ + "tetra-rp/", +] diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..8ae010c --- /dev/null +++ b/src/__init__.py @@ -0,0 +1 @@ +"""Worker Tetra package.""" diff --git a/src/class_executor.py b/src/class_executor.py index 46fa81a..4a3b656 100644 --- a/src/class_executor.py +++ b/src/class_executor.py @@ -18,7 +18,7 @@ def __init__(self, workspace_manager): super().__init__(workspace_manager) # Instance registry for persistent class instances self.class_instances: Dict[str, Any] = {} - self.instance_metadata: Dict[str, Dict] = {} + self.instance_metadata: Dict[str, Dict[str, Any]] = {} def execute(self, request: FunctionRequest) -> FunctionResponse: """Execute class method - required by BaseExecutor interface.""" diff --git a/src/constants.py b/src/constants.py index 53fd4f7..ee00120 100644 --- a/src/constants.py +++ b/src/constants.py @@ -20,3 +20,75 @@ RUNTIMES_DIR_NAME = "runtimes" """Name of the runtimes directory containing per-endpoint workspaces.""" + +# Download Acceleration Settings +MIN_SIZE_FOR_ACCELERATION_MB = 10 +"""Minimum file size in MB to trigger download acceleration.""" + +DOWNLOAD_TIMEOUT_SECONDS = 600 +"""Default timeout for download operations in seconds.""" + +# New download accelerator settings +HF_TRANSFER_ENABLED = True +"""Enable hf_transfer for fresh HuggingFace downloads.""" + + +# Size Conversion Constants +BYTES_PER_MB = 1024 * 1024 +"""Number of bytes in a megabyte.""" + +MB_SIZE_THRESHOLD = 1 * BYTES_PER_MB +"""Minimum file size threshold for considering acceleration (1MB).""" + +# HuggingFace Model Patterns +LARGE_HF_MODEL_PATTERNS = [ + "albert-large", + "albert-xlarge", + "bart-large", + "bert-large", + "bert-base", + "codegen", + "diffusion", + "distilbert-base", + "falcon", + "gpt", + "hubert", + "llama", + "mistral", + "mpt", + "pegasus", + "roberta-large", + "roberta-base", + "santacoder", + "stable-diffusion", + "t5", + "vae", + "wav2vec2", + "whisper", + "xlm-roberta", + "xlnet", +] +"""List of HuggingFace model patterns that benefit from download acceleration.""" + +# System Package Acceleration with Nala +LARGE_SYSTEM_PACKAGES = [ + "build-essential", + "cmake", + "cuda-toolkit", + "curl", + "g++", + "gcc", + "git", + "libssl-dev", + "nvidia-cuda-dev", + "python3-dev", + "wget", +] +"""List of system packages that benefit from nala's accelerated installation.""" + +NALA_CHECK_CMD = ["which", "nala"] +"""Command to check if nala is available.""" + +# Logging Configuration +LOG_FORMAT = "%(asctime)s - %(levelname)s - %(name)s - %(message)s" +"""Standard log format string used across the application.""" diff --git a/src/dependency_installer.py b/src/dependency_installer.py index 8f15c81..1b9b0b9 100644 --- a/src/dependency_installer.py +++ b/src/dependency_installer.py @@ -2,9 +2,12 @@ import subprocess import importlib import logging +import asyncio from typing import List, Dict from remote_execution import FunctionResponse +from download_accelerator import DownloadAccelerator +from constants import LARGE_SYSTEM_PACKAGES, NALA_CHECK_CMD class DependencyInstaller: @@ -13,10 +16,21 @@ class DependencyInstaller: def __init__(self, workspace_manager): self.workspace_manager = workspace_manager self.logger = logging.getLogger(__name__) + self.download_accelerator = DownloadAccelerator(workspace_manager) + self._nala_available = None # Cache nala availability check - def install_system_dependencies(self, packages: List[str]) -> FunctionResponse: + def install_system_dependencies( + self, packages: List[str], accelerate_downloads: bool = True + ) -> FunctionResponse: """ - Install system packages using apt-get. + Install system packages using nala (accelerated) or apt-get (standard). + + Args: + packages: List of system package names + accelerate_downloads: Whether to use nala for accelerated downloads + + Returns: + FunctionResponse: Object indicating success or failure with details """ if not packages: return FunctionResponse( @@ -25,59 +39,26 @@ def install_system_dependencies(self, packages: List[str]) -> FunctionResponse: self.logger.info(f"Installing system dependencies: {packages}") - try: - # Update package list first - update_process = subprocess.Popen( - ["apt-get", "update"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - update_stdout, update_stderr = update_process.communicate() + # Check if we should use accelerated installation with nala + large_packages = self._identify_large_system_packages(packages) - if update_process.returncode != 0: - return FunctionResponse( - success=False, - error="Error updating package list", - stdout=update_stderr.decode(), - ) - - # Install the packages - process = subprocess.Popen( - ["apt-get", "install", "-y", "--no-install-recommends"] + packages, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env={ - **os.environ, - "DEBIAN_FRONTEND": "noninteractive", - }, - ) - - stdout, stderr = process.communicate() - - if process.returncode != 0: - return FunctionResponse( - success=False, - error="Error installing system packages", - stdout=stderr.decode(), - ) - else: - self.logger.info(f"Successfully installed system packages: {packages}") - return FunctionResponse( - success=True, - stdout=stdout.decode(), - ) - except Exception as e: - return FunctionResponse( - success=False, - error=f"Exception during system package installation: {e}", + if accelerate_downloads and large_packages and self._check_nala_available(): + self.logger.info( + f"Using nala for accelerated installation of system packages: {large_packages}" ) + return self._install_system_with_nala(packages) + else: + return self._install_system_standard(packages) - def install_dependencies(self, packages: List[str]) -> FunctionResponse: + def install_dependencies( + self, packages: List[str], accelerate_downloads: bool = True + ) -> FunctionResponse: """ - Install Python packages using uv with differential installation support. + Install Python packages using uv (accelerated) or pip (standard). Args: packages: List of package names or package specifications + accelerate_downloads: Whether to use uv for accelerated downloads Returns: FunctionResponse: Object indicating success or failure with details """ @@ -86,37 +67,54 @@ def install_dependencies(self, packages: List[str]) -> FunctionResponse: self.logger.info(f"Installing dependencies: {packages}") - # If using volume, check which packages are already installed - if ( - self.workspace_manager.has_runpod_volume - and self.workspace_manager.venv_path - and os.path.exists(self.workspace_manager.venv_path) - ): - # Validate virtual environment before using it - validation_result = self.workspace_manager._validate_virtual_environment() - if not validation_result.success: - self.logger.warning( - f"Virtual environment is invalid: {validation_result.error}" + # Always use UV for Python package installation (more reliable than pip) + # When acceleration is enabled, use differential installation + if accelerate_downloads: + if ( + self.workspace_manager.has_runpod_volume + and self.workspace_manager.venv_path + and os.path.exists(self.workspace_manager.venv_path) + ): + # Validate virtual environment before using it + validation_result = ( + self.workspace_manager._validate_virtual_environment() ) - self.logger.info("Reinitializing workspace...") - init_result = self.workspace_manager.initialize_workspace() - if not init_result.success: + if not validation_result.success: + self.logger.warning( + f"Virtual environment is invalid: {validation_result.error}" + ) + self.logger.info("Reinitializing workspace...") + init_result = self.workspace_manager.initialize_workspace() + if not init_result.success: + return FunctionResponse( + success=False, + error=f"Failed to reinitialize workspace: {init_result.error}", + ) + installed_packages = self._get_installed_packages() + packages_to_install = self._filter_packages_to_install( + packages, installed_packages + ) + + if not packages_to_install: return FunctionResponse( - success=False, - error=f"Failed to reinitialize workspace: {init_result.error}", + success=True, stdout="All packages already installed" ) - installed_packages = self._get_installed_packages() - packages_to_install = self._filter_packages_to_install( - packages, installed_packages - ) - if not packages_to_install: - return FunctionResponse( - success=True, stdout="All packages already installed" - ) + packages = packages_to_install - packages = packages_to_install + # Always use UV (works reliably with virtual environments) + return self._install_with_uv(packages) + def _install_with_uv(self, packages: List[str]) -> FunctionResponse: + """ + Install packages using UV package manager + + Args: + packages: Packages to install + + Returns: + FunctionResponse with installation result + """ try: # Prepare environment for virtual environment usage env = os.environ.copy() @@ -127,7 +125,7 @@ def install_dependencies(self, packages: List[str]) -> FunctionResponse: env["VIRTUAL_ENV"] = self.workspace_manager.venv_path # Use uv pip to install the packages - command = ["uv", "pip", "install", "--no-cache-dir"] + packages + command = ["uv", "pip", "install"] + packages process = subprocess.Popen( command, stdout=subprocess.PIPE, @@ -211,3 +209,201 @@ def _filter_packages_to_install( packages_to_install.append(package) return packages_to_install + + def _check_nala_available(self) -> bool: + """ + Check if nala is available and cache the result. + + Returns: + True if nala is available, False otherwise + """ + if self._nala_available is None: + try: + process = subprocess.Popen( + NALA_CHECK_CMD, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + process.communicate() + self._nala_available = process.returncode == 0 + + if self._nala_available: + self.logger.debug( + "nala is available for accelerated system package installation" + ) + else: + self.logger.debug("nala is not available, falling back to apt-get") + + except Exception: + self._nala_available = False + self.logger.debug( + "nala availability check failed, falling back to apt-get" + ) + + return self._nala_available + + def _identify_large_system_packages(self, packages: List[str]) -> List[str]: + """ + Identify system packages that are likely to be large and benefit from acceleration. + + Args: + packages: List of system package names + + Returns: + List of package names that are likely large + """ + large_packages = [] + for package in packages: + if any(pattern in package for pattern in LARGE_SYSTEM_PACKAGES): + large_packages.append(package) + return large_packages + + def _install_system_with_nala(self, packages: List[str]) -> FunctionResponse: + """ + Install system packages using nala for accelerated downloads. + + Args: + packages: System packages to install + + Returns: + FunctionResponse with installation result + """ + try: + # Update package list first with nala + self.logger.info("Updating package list with nala") + update_process = subprocess.Popen( + ["nala", "update"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + update_stdout, update_stderr = update_process.communicate() + + if update_process.returncode != 0: + self.logger.warning( + "nala update failed, falling back to standard installation" + ) + return self._install_system_standard(packages) + + # Install packages with nala + self.logger.info("Installing packages with nala acceleration") + process = subprocess.Popen( + ["nala", "install", "-y"] + packages, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env={ + **os.environ, + "DEBIAN_FRONTEND": "noninteractive", + }, + ) + + stdout, stderr = process.communicate() + + if process.returncode != 0: + self.logger.warning( + "nala installation failed, falling back to standard installation" + ) + return self._install_system_standard(packages) + else: + self.logger.info( + f"Successfully installed system packages with nala: {packages}" + ) + return FunctionResponse( + success=True, + stdout=f"Installed with nala acceleration: {stdout.decode()}", + ) + except Exception as e: + self.logger.warning( + f"nala installation failed with exception, falling back to standard: {e}" + ) + return self._install_system_standard(packages) + + def _install_system_standard(self, packages: List[str]) -> FunctionResponse: + """ + Install system packages using standard apt-get method. + + Args: + packages: System packages to install + + Returns: + FunctionResponse with installation result + """ + try: + # Update package list first + update_process = subprocess.Popen( + ["apt-get", "update"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + update_stdout, update_stderr = update_process.communicate() + + if update_process.returncode != 0: + return FunctionResponse( + success=False, + error="Error updating package list", + stdout=update_stderr.decode(), + ) + + # Install the packages + process = subprocess.Popen( + ["apt-get", "install", "-y", "--no-install-recommends"] + packages, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env={ + **os.environ, + "DEBIAN_FRONTEND": "noninteractive", + }, + ) + + stdout, stderr = process.communicate() + + if process.returncode != 0: + return FunctionResponse( + success=False, + error="Error installing system packages", + stdout=stderr.decode(), + ) + else: + self.logger.info(f"Successfully installed system packages: {packages}") + return FunctionResponse( + success=True, + stdout=stdout.decode(), + ) + except Exception as e: + return FunctionResponse( + success=False, + error=f"Exception during system package installation: {e}", + ) + + async def install_system_dependencies_async( + self, packages: List[str], accelerate_downloads: bool = True + ) -> FunctionResponse: + """ + Async wrapper for system dependency installation. + + Args: + packages: List of system package names + accelerate_downloads: Whether to use nala for accelerated downloads + + Returns: + FunctionResponse: Object indicating success or failure with details + """ + return await asyncio.to_thread( + self.install_system_dependencies, packages, accelerate_downloads + ) + + async def install_dependencies_async( + self, packages: List[str], accelerate_downloads: bool = True + ) -> FunctionResponse: + """ + Async wrapper for Python dependency installation. + + Args: + packages: List of package names or package specifications + accelerate_downloads: Whether to use uv for accelerated downloads + + Returns: + FunctionResponse: Object indicating success or failure with details + """ + return await asyncio.to_thread( + self.install_dependencies, packages, accelerate_downloads + ) diff --git a/src/download_accelerator.py b/src/download_accelerator.py new file mode 100644 index 0000000..9f59385 --- /dev/null +++ b/src/download_accelerator.py @@ -0,0 +1,266 @@ +""" +Download acceleration using hf_transfer for optimal HuggingFace model downloads. + +This module provides accelerated download capabilities optimized for HuggingFace models: +- hf_transfer for accelerated downloads when available +- hf_xet acceleration is automatically handled by HuggingFace Hub (huggingface_hub>=0.32.0) +- Standard HF hub as reliable fallback +""" + +import os +import time +import logging +from dataclasses import dataclass +from typing import Optional + +from remote_execution import FunctionResponse +from constants import ( + MIN_SIZE_FOR_ACCELERATION_MB, + HF_TRANSFER_ENABLED, +) + + +@dataclass +class DownloadMetrics: + """Performance metrics for download operations.""" + + method: str + file_size_bytes: int + total_time_seconds: float + average_speed_mbps: float + success: bool + error_message: Optional[str] = None + + @property + def speed_mb_per_sec(self) -> float: + """Convert to MB/s for easier reading.""" + return self.average_speed_mbps / 8.0 + + @property + def file_size_mb(self) -> float: + """File size in megabytes.""" + return self.file_size_bytes / (1024 * 1024) + + +class HfTransferDownloader: + """HuggingFace Transfer downloader for fresh downloads.""" + + def __init__(self): + self.logger = logging.getLogger(__name__) + self.hf_transfer_available = self._check_hf_transfer() + + def _check_hf_transfer(self) -> bool: + """Check if hf_transfer is available.""" + import importlib.util + + if importlib.util.find_spec("hf_transfer") is not None: + return HF_TRANSFER_ENABLED + else: + self.logger.debug("hf_transfer not available") + return False + + def download( + self, + url: str, + output_path: str, + show_progress: bool = False, + ) -> DownloadMetrics: + """ + Download file using hf_transfer for maximum speed. + + Args: + url: URL to download + output_path: Local file path to save to + show_progress: Whether to show real-time progress + + Returns: + DownloadMetrics with performance data + """ + if not self.hf_transfer_available: + raise RuntimeError("hf_transfer not available") + + start_time = time.time() + + try: + # Set HF_HUB_ENABLE_HF_TRANSFER environment variable + env = os.environ.copy() + env["HF_HUB_ENABLE_HF_TRANSFER"] = "1" + + # Add authentication if HF token is available + hf_token = os.environ.get("HF_TOKEN") + if hf_token: + env["HF_TOKEN"] = hf_token + + # Use hf_transfer via huggingface_hub + from huggingface_hub import hf_hub_download + + # Extract model_id and filename from URL + # URL format: https://huggingface.co/{model_id}/resolve/{revision}/{filename} + if "huggingface.co" in url and "/resolve/" in url: + parts = url.replace("https://huggingface.co/", "").split("/resolve/") + model_id = parts[0] + revision_and_filename = parts[1].split("/", 1) + revision = revision_and_filename[0] + filename = revision_and_filename[1] + + # Create output directory + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + # Download using hf_hub_download with hf_transfer enabled + downloaded_path = hf_hub_download( + repo_id=model_id, + filename=filename, + revision=revision, + cache_dir=os.path.dirname(output_path), + local_dir=os.path.dirname(output_path), + local_dir_use_symlinks=False, + ) + + # Move to expected location if needed + if downloaded_path != output_path: + import shutil + + shutil.move(downloaded_path, output_path) + + else: + # Fallback to direct download for non-HF URLs + raise ValueError("hf_transfer only supports HuggingFace URLs") + + end_time = time.time() + file_size = ( + os.path.getsize(output_path) if os.path.exists(output_path) else 0 + ) + total_time = end_time - start_time + + if total_time > 0 and file_size > 0: + bits_per_second = (file_size * 8) / total_time + avg_speed = bits_per_second / (1024 * 1024) + else: + avg_speed = 0 + + self.logger.info( + f"Downloaded {file_size / (1024 * 1024):.1f}MB in {total_time:.1f}s " + f"({avg_speed / 8:.1f} MB/s) using hf_transfer" + ) + + return DownloadMetrics( + method="hf_transfer", + file_size_bytes=file_size, + total_time_seconds=total_time, + average_speed_mbps=avg_speed, + success=True, + ) + + except Exception as e: + self.logger.error(f"hf_transfer download failed: {str(e)}") + return DownloadMetrics( + method="hf_transfer", + file_size_bytes=0, + total_time_seconds=time.time() - start_time, + average_speed_mbps=0, + success=False, + error_message=str(e), + ) + + +class DownloadAccelerator: + """ + Main download acceleration coordinator using hf_transfer. + + Note: hf_xet acceleration is now automatically handled by HuggingFace Hub + when using hf_hub_download() or snapshot_download() functions. + """ + + def __init__(self, workspace_manager=None): + self.workspace_manager = workspace_manager + self.logger = logging.getLogger(__name__) + self.hf_transfer_downloader = HfTransferDownloader() + + def should_accelerate_download( + self, url: str, estimated_size_mb: float = 0 + ) -> bool: + """ + Determine if download should be accelerated. + + Args: + url: Download URL + estimated_size_mb: Estimated file size in MB + + Returns: + True if download should be accelerated + """ + # Only accelerate HuggingFace downloads with our new methods + if "huggingface.co" not in url: + return False + + if estimated_size_mb >= MIN_SIZE_FOR_ACCELERATION_MB: + return True + + # For HuggingFace URLs, always try acceleration + return True + + def is_file_cached(self, output_path: str) -> bool: + """Check if file is already cached locally.""" + return os.path.exists(output_path) and os.path.getsize(output_path) > 0 + + def download_with_fallback( + self, + url: str, + output_path: str, + estimated_size_mb: float = 0, + show_progress: bool = False, + ) -> FunctionResponse: + """ + Download with HF optimization when applicable. + + Strategy: + 1. Use hf_transfer for HF URLs when available and size warrants acceleration + 2. Otherwise return failure - let HF's native download handling work + + Args: + url: URL to download + output_path: Local file path + estimated_size_mb: Estimated size for acceleration decision + show_progress: Whether to show progress + + Returns: + FunctionResponse with download result + """ + if not self.should_accelerate_download(url, estimated_size_mb): + self.logger.info( + f"Not accelerating download, letting HF handle natively: {url}" + ) + return FunctionResponse( + success=False, + error="No acceleration available - defer to HF native handling", + ) + + # Strategy 1: Try hf_transfer (hf_xet is automatically used by HF Hub when available) + if self.hf_transfer_downloader.hf_transfer_available: + try: + self.logger.info(f"Using hf_transfer for download: {url}") + metrics = self.hf_transfer_downloader.download( + url, output_path, show_progress=show_progress + ) + + if metrics.success: + return FunctionResponse( + success=True, + stdout=f"Downloaded {metrics.file_size_mb:.1f}MB in {metrics.total_time_seconds:.1f}s " + f"({metrics.speed_mb_per_sec:.1f} MB/s) using hf_transfer", + ) + else: + self.logger.warning( + f"hf_transfer download failed: {metrics.error_message}" + ) + except Exception as e: + self.logger.warning(f"hf_transfer download failed: {e}") + + # No acceleration available - let HF handle natively + self.logger.info( + f"No acceleration available for {url}, deferring to HF native handling" + ) + return FunctionResponse( + success=False, + error="Acceleration not available - defer to HF native handling", + ) diff --git a/src/handler.py b/src/handler.py index 31893a3..0cd0903 100644 --- a/src/handler.py +++ b/src/handler.py @@ -1,19 +1,21 @@ import runpod import logging import sys +from typing import Dict, Any from remote_execution import FunctionRequest, FunctionResponse from remote_executor import RemoteExecutor +from constants import LOG_FORMAT logging.basicConfig( level=logging.DEBUG, # or INFO for less verbose output stream=sys.stdout, # send logs to stdout (so docker captures it) - format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + format=LOG_FORMAT, ) -async def handler(event: dict) -> dict: +async def handler(event: Dict[str, Any]) -> Dict[str, Any]: """ RunPod serverless function handler with dependency installation. """ diff --git a/src/hf_download_strategy.py b/src/hf_download_strategy.py new file mode 100644 index 0000000..d8e1df0 --- /dev/null +++ b/src/hf_download_strategy.py @@ -0,0 +1,81 @@ +""" +HuggingFace download strategy interface. + +Provides pluggable download strategies for HuggingFace models to allow +switching between different acceleration methods and benchmarking performance. +""" + +from abc import ABC, abstractmethod +from typing import Dict, Any +from remote_execution import FunctionResponse + + +class HFDownloadStrategy(ABC): + """Abstract base class for HuggingFace download strategies.""" + + @abstractmethod + def download_model(self, model_id: str, revision: str = "main") -> FunctionResponse: + """ + Download a HuggingFace model. + + Args: + model_id: HuggingFace model identifier + revision: Model revision/branch + + Returns: + FunctionResponse with download results + """ + pass + + @abstractmethod + def is_model_cached(self, model_id: str, revision: str = "main") -> bool: + """ + Check if model is already cached. + + Args: + model_id: HuggingFace model identifier + revision: Model revision/branch + + Returns: + True if model appears to be cached + """ + pass + + @abstractmethod + def get_cache_info(self, model_id: str) -> Dict[str, Any]: + """ + Get cache information for a model. + + Args: + model_id: HuggingFace model identifier + + Returns: + Dictionary with cache information + """ + pass + + @abstractmethod + def should_accelerate(self, model_id: str) -> bool: + """ + Determine if model should use acceleration. + + Args: + model_id: HuggingFace model identifier + + Returns: + True if acceleration should be used + """ + pass + + @abstractmethod + def clear_model_cache(self, model_id: str) -> FunctionResponse: + """ + Clear cache for a specific model. + + Args: + model_id: HuggingFace model identifier + + Returns: + FunctionResponse with clearing result + """ + pass diff --git a/src/hf_downloader_native.py b/src/hf_downloader_native.py new file mode 100644 index 0000000..4e1f630 --- /dev/null +++ b/src/hf_downloader_native.py @@ -0,0 +1,175 @@ +""" +Native HuggingFace downloader strategy. + +This strategy implements the current simplified approach using HF Hub's +native snapshot_download() with built-in acceleration support. +""" + +import logging +from typing import Dict, Any + +from huggingface_hub import HfApi, snapshot_download +from remote_execution import FunctionResponse +from hf_download_strategy import HFDownloadStrategy +from constants import LARGE_HF_MODEL_PATTERNS, BYTES_PER_MB + + +class NativeHFDownloader(HFDownloadStrategy): + """Native HuggingFace downloader using HF Hub's built-in acceleration.""" + + def __init__(self, workspace_manager): + self.workspace_manager = workspace_manager + self.logger = logging.getLogger(__name__) + self.api = HfApi() + + # HF will automatically use HF_HOME environment variable set by workspace_manager + # No need to manually manage cache directories + + def should_accelerate(self, model_id: str) -> bool: + """ + Determine if model should be pre-cached. + HF Hub automatically uses hf_transfer when available. + + Args: + model_id: HuggingFace model identifier + + Returns: + True if model should be pre-cached + """ + model_lower = model_id.lower() + return any(pattern in model_lower for pattern in LARGE_HF_MODEL_PATTERNS) + + def download_model(self, model_id: str, revision: str = "main") -> FunctionResponse: + """ + Pre-download HuggingFace model using HF Hub's native caching. + + This method downloads the complete model snapshot to HF's standard cache + location, leveraging hf_transfer when available. + + Args: + model_id: HuggingFace model identifier + revision: Model revision/branch + + Returns: + FunctionResponse with download results + """ + if not self.should_accelerate(model_id): + return FunctionResponse( + success=True, stdout=f"Model {model_id} does not require pre-caching" + ) + + self.logger.info(f"Pre-caching model: {model_id}") + + try: + # Use HF Hub's native snapshot download with acceleration + snapshot_path = snapshot_download( + repo_id=model_id, + revision=revision, + # HF automatically uses HF_HOME/HF_HUB_CACHE from environment + # and applies hf_transfer acceleration when available + ) + + return FunctionResponse( + success=True, + stdout=f"Successfully pre-cached model {model_id} to {snapshot_path}", + ) + + except Exception as e: + return FunctionResponse( + success=False, + error=f"Failed to pre-cache model {model_id}: {str(e)}", + ) + + def is_model_cached(self, model_id: str, revision: str = "main") -> bool: + """ + Check if model is already cached using HF Hub's cache utilities. + + Args: + model_id: HuggingFace model identifier + revision: Model revision/branch + + Returns: + True if model appears to be cached + """ + try: + from huggingface_hub import try_to_load_from_cache + + # Check for common model files that indicate a cached model + key_files = ["config.json", "pytorch_model.bin", "model.safetensors"] + + for filename in key_files: + cached_path = try_to_load_from_cache( + repo_id=model_id, filename=filename, revision=revision + ) + if cached_path is not None: # Found cached file + return True + + return False + except Exception: + return False + + def get_cache_info(self, model_id: str) -> Dict[str, Any]: + """ + Get cache information for a model using HF Hub utilities. + + Args: + model_id: HuggingFace model identifier + + Returns: + Dictionary with cache information + """ + try: + from huggingface_hub import scan_cache_dir + + cache_info = scan_cache_dir() + + # Find our specific model in the cache + for repo in cache_info.repos: + if repo.repo_id == model_id: + return { + "cached": True, + "cache_size_mb": repo.size_on_disk / BYTES_PER_MB, + "file_count": len(list(repo.revisions)[0].files) + if repo.revisions + else 0, + "cache_path": str(repo.repo_path), + } + + return {"cached": False, "cache_size_mb": 0, "file_count": 0} + + except Exception: + return {"cached": False, "cache_size_mb": 0, "file_count": 0} + + def clear_model_cache(self, model_id: str) -> FunctionResponse: + """ + Clear cache for a specific model using HF Hub utilities. + + Args: + model_id: HuggingFace model identifier + + Returns: + FunctionResponse with clearing result + """ + try: + from huggingface_hub import scan_cache_dir + + cache_info = scan_cache_dir() + + # Find and delete our specific model + for repo in cache_info.repos: + if repo.repo_id == model_id: + delete_strategy = cache_info.delete_revisions(repo.repo_id) + delete_strategy.execute() + + return FunctionResponse( + success=True, stdout=f"Cleared cache for model {model_id}" + ) + + return FunctionResponse( + success=True, stdout=f"No cache found for model {model_id}" + ) + + except Exception as e: + return FunctionResponse( + success=False, error=f"Failed to clear cache for {model_id}: {str(e)}" + ) diff --git a/src/hf_downloader_tetra.py b/src/hf_downloader_tetra.py new file mode 100644 index 0000000..d9fa6ab --- /dev/null +++ b/src/hf_downloader_tetra.py @@ -0,0 +1,270 @@ +""" +Tetra HuggingFace downloader strategy. + +This strategy implements a custom acceleration logic with +manual file enumeration and file-by-file downloads using +hf_transfer and custom acceleration methods. +""" + +import logging +from typing import Dict, List, Any +from pathlib import Path + +from huggingface_hub import HfApi +from remote_execution import FunctionResponse +from hf_download_strategy import HFDownloadStrategy +from download_accelerator import DownloadAccelerator +from constants import LARGE_HF_MODEL_PATTERNS, BYTES_PER_MB, MB_SIZE_THRESHOLD + + +class TetraHFDownloader(HFDownloadStrategy): + """Custom Tetra HuggingFace downloader with manual acceleration logic.""" + + def __init__(self, workspace_manager): + self.workspace_manager = workspace_manager + self.logger = logging.getLogger(__name__) + self.download_accelerator = DownloadAccelerator(workspace_manager) + self.api = HfApi() + + # Use workspace manager's HF cache if available + if workspace_manager and workspace_manager.hf_cache_path: + self.cache_dir = Path(workspace_manager.hf_cache_path) + else: + self.cache_dir = Path.home() / ".cache" / "huggingface" + + self.cache_dir.mkdir(parents=True, exist_ok=True) + + def get_model_files( + self, model_id: str, revision: str = "main" + ) -> List[Dict[str, Any]]: + """ + Get list of files for a HuggingFace model using the HF Hub API. + + Args: + model_id: HuggingFace model identifier (e.g., 'gpt2', 'microsoft/DialoGPT-medium') + revision: Model revision/branch (default: 'main') + + Returns: + List of file information dictionaries + """ + try: + # Use HF Hub's native API instead of manual requests + repo_info = self.api.repo_info(model_id, revision=revision) + + files = [] + if repo_info.siblings: + for sibling in repo_info.siblings: + if sibling.rfilename: # Only include actual files + files.append( + { + "path": sibling.rfilename, + "size": getattr(sibling, "size", 0) or 0, + "url": f"https://huggingface.co/{model_id}/resolve/{revision}/{sibling.rfilename}", + } + ) + + return files + + except Exception as e: + self.logger.warning(f"Could not fetch model file list for {model_id}: {e}") + return [] + + def should_accelerate(self, model_id: str) -> bool: + """ + Determine if model downloads should be accelerated. + + Args: + model_id: HuggingFace model identifier + + Returns: + True if acceleration should be used + """ + # Check if hf_transfer is available + has_hf_transfer = ( + self.download_accelerator.hf_transfer_downloader.hf_transfer_available + ) + + if not has_hf_transfer: + return False + + model_lower = model_id.lower() + return any(pattern in model_lower for pattern in LARGE_HF_MODEL_PATTERNS) + + def download_model(self, model_id: str, revision: str = "main") -> FunctionResponse: + """ + Download HuggingFace model files using Tetra's custom acceleration. + + This method downloads model files to the cache before transformers tries to access them, + using hf_transfer or custom acceleration for optimized downloads. + + Args: + model_id: HuggingFace model identifier + revision: Model revision/branch + + Returns: + FunctionResponse with download results + """ + if not self.should_accelerate(model_id): + return FunctionResponse( + success=True, stdout=f"Model {model_id} does not require acceleration" + ) + + self.logger.info(f"Accelerating model download: {model_id}") + + # Get model file list + files = self.get_model_files(model_id, revision) + if not files: + return FunctionResponse( + success=False, error=f"Could not get file list for model {model_id}" + ) + + # Filter for main model files (ignore small config files) + large_files = [f for f in files if f["size"] > MB_SIZE_THRESHOLD] + + if not large_files: + return FunctionResponse( + success=True, stdout=f"No large files found for model {model_id}" + ) + + self.logger.info( + f"Found {len(large_files)} large files to download for {model_id}" + ) + + # Create model-specific cache directory + model_cache_dir = self.cache_dir / "transformers" / model_id.replace("/", "--") + model_cache_dir.mkdir(parents=True, exist_ok=True) + + successful_downloads = 0 + total_size = sum(f["size"] for f in large_files) + + for file_info in large_files: + file_path = model_cache_dir / file_info["path"] + file_path.parent.mkdir(parents=True, exist_ok=True) + + # Skip if file already exists and is correct size + if file_path.exists() and file_path.stat().st_size == file_info["size"]: + self.logger.info(f"✓ {file_info['path']} (cached)") + successful_downloads += 1 + continue + + try: + file_size_mb = file_info["size"] / BYTES_PER_MB + self.logger.info( + f"Downloading {file_info['path']} ({file_size_mb:.1f}MB)..." + ) + + # Use download accelerator + result = self.download_accelerator.download_with_fallback( + file_info["url"], + str(file_path), + estimated_size_mb=file_size_mb, + show_progress=True, + ) + + if result.success: + successful_downloads += 1 + self.logger.info(f"✓ {file_info['path']} downloaded successfully") + else: + self.logger.error(f"✗ {file_info['path']} failed: {result.error}") + + except Exception as e: + self.logger.error( + f"✗ {file_info['path']} failed with exception: {str(e)}" + ) + + success = successful_downloads == len(large_files) + + if success: + return FunctionResponse( + success=True, + stdout=f"Successfully pre-downloaded {successful_downloads} files " + f"({total_size / BYTES_PER_MB:.1f}MB) for model {model_id}", + ) + else: + return FunctionResponse( + success=False, + error=f"Failed to download {len(large_files) - successful_downloads} files for {model_id}", + stdout=f"Downloaded {successful_downloads}/{len(large_files)} files", + ) + + def is_model_cached(self, model_id: str, revision: str = "main") -> bool: + """ + Check if model is already cached. + + Args: + model_id: HuggingFace model identifier + revision: Model revision/branch + + Returns: + True if model appears to be cached + """ + model_cache_dir = self.cache_dir / "transformers" / model_id.replace("/", "--") + + if not model_cache_dir.exists(): + return False + + # Check if there are any model files + model_files = list(model_cache_dir.glob("**/*.bin")) + list( + model_cache_dir.glob("**/*.safetensors") + ) + return len(model_files) > 0 + + def get_cache_info(self, model_id: str) -> Dict[str, Any]: + """ + Get cache information for a model. + + Args: + model_id: HuggingFace model identifier + + Returns: + Dictionary with cache information + """ + model_cache_dir = self.cache_dir / "transformers" / model_id.replace("/", "--") + + if not model_cache_dir.exists(): + return {"cached": False, "cache_size_mb": 0, "file_count": 0} + + total_size = 0 + file_count = 0 + + for file_path in model_cache_dir.rglob("*"): + if file_path.is_file(): + total_size += file_path.stat().st_size + file_count += 1 + + return { + "cached": file_count > 0, + "cache_size_mb": total_size / BYTES_PER_MB, + "file_count": file_count, + "cache_path": str(model_cache_dir), + } + + def clear_model_cache(self, model_id: str) -> FunctionResponse: + """ + Clear cache for a specific model. + + Args: + model_id: HuggingFace model identifier + + Returns: + FunctionResponse with clearing result + """ + model_cache_dir = self.cache_dir / "transformers" / model_id.replace("/", "--") + + if not model_cache_dir.exists(): + return FunctionResponse( + success=True, stdout=f"No cache found for model {model_id}" + ) + + try: + import shutil + + shutil.rmtree(model_cache_dir) + + return FunctionResponse( + success=True, stdout=f"Cleared cache for model {model_id}" + ) + except Exception as e: + return FunctionResponse( + success=False, error=f"Failed to clear cache for {model_id}: {str(e)}" + ) diff --git a/src/hf_strategy_factory.py b/src/hf_strategy_factory.py new file mode 100644 index 0000000..1ce81de --- /dev/null +++ b/src/hf_strategy_factory.py @@ -0,0 +1,119 @@ +""" +HuggingFace download strategy factory. + +Provides configuration system for switching between different HF download strategies +and creating the appropriate downloader instance based on environment variables. +""" + +import os +import logging +from typing import Optional, Dict, Any + +from hf_download_strategy import HFDownloadStrategy +from hf_downloader_tetra import TetraHFDownloader +from hf_downloader_native import NativeHFDownloader + + +class HFStrategyFactory: + """Factory for creating HF download strategy instances.""" + + # Environment variable name + STRATEGY_ENV_VAR = "HF_DOWNLOAD_STRATEGY" + + # Available strategy names + TETRA_STRATEGY = "tetra" + NATIVE_STRATEGY = "native" + + # Default strategy + DEFAULT_STRATEGY = TETRA_STRATEGY + + @classmethod + def get_available_strategies(cls) -> list[str]: + """Get list of available strategy names.""" + return [cls.TETRA_STRATEGY, cls.NATIVE_STRATEGY] + + @classmethod + def get_configured_strategy(cls) -> str: + """ + Get the configured strategy name from environment variables. + + Returns: + Strategy name (defaults to native if not configured) + """ + strategy = os.environ.get(cls.STRATEGY_ENV_VAR, cls.DEFAULT_STRATEGY).lower() + + # Validate strategy + if strategy not in cls.get_available_strategies(): + logger = logging.getLogger(__name__) + logger.warning( + f"Unknown HF download strategy '{strategy}', falling back to '{cls.DEFAULT_STRATEGY}'" + ) + return cls.DEFAULT_STRATEGY + + return strategy + + @classmethod + def create_strategy( + cls, workspace_manager, strategy: Optional[str] = None + ) -> HFDownloadStrategy: + """ + Create HF download strategy instance. + + Args: + workspace_manager: Workspace manager instance + strategy: Optional strategy override (defaults to environment configuration) + + Returns: + HFDownloadStrategy instance + """ + if strategy is None: + strategy = cls.get_configured_strategy() + + logger = logging.getLogger(__name__) + logger.info(f"Creating HF download strategy: {strategy}") + + if strategy == cls.TETRA_STRATEGY: + return TetraHFDownloader(workspace_manager) + elif strategy == cls.NATIVE_STRATEGY: + return NativeHFDownloader(workspace_manager) + else: + # Fallback to native + logger.warning(f"Unknown strategy '{strategy}', using native") + return NativeHFDownloader(workspace_manager) + + @classmethod + def set_strategy(cls, strategy: str) -> None: + """ + Set the HF download strategy via environment variable. + + Args: + strategy: Strategy name to set + """ + if strategy not in cls.get_available_strategies(): + raise ValueError( + f"Invalid strategy '{strategy}'. Available: {cls.get_available_strategies()}" + ) + + os.environ[cls.STRATEGY_ENV_VAR] = strategy + + logger = logging.getLogger(__name__) + logger.info(f"Set HF download strategy to: {strategy}") + + @classmethod + def get_strategy_info(cls) -> Dict[str, Any]: + """ + Get information about the current strategy configuration. + + Returns: + Dictionary with strategy configuration info + """ + current_strategy = cls.get_configured_strategy() + env_value = os.environ.get(cls.STRATEGY_ENV_VAR, "not set") + + return { + "current_strategy": current_strategy, + "environment_variable": cls.STRATEGY_ENV_VAR, + "environment_value": env_value, + "default_strategy": cls.DEFAULT_STRATEGY, + "available_strategies": cls.get_available_strategies(), + } diff --git a/src/huggingface_accelerator.py b/src/huggingface_accelerator.py new file mode 100644 index 0000000..2f2b2ad --- /dev/null +++ b/src/huggingface_accelerator.py @@ -0,0 +1,150 @@ +""" +HuggingFace model download acceleration. + +This module provides accelerated downloads for HuggingFace models and datasets, +integrating with the existing volume workspace caching system using pluggable +download strategies. +""" + +import logging +from typing import Dict, List, Any + +from huggingface_hub import HfApi +from remote_execution import FunctionResponse +from hf_strategy_factory import HFStrategyFactory +from hf_download_strategy import HFDownloadStrategy + + +class HuggingFaceAccelerator: + """Accelerated downloads for HuggingFace models and files using pluggable strategies.""" + + def __init__(self, workspace_manager): + self.workspace_manager = workspace_manager + self.logger = logging.getLogger(__name__) + self.api = HfApi() + + # Create the configured download strategy + self.strategy: HFDownloadStrategy = HFStrategyFactory.create_strategy( + workspace_manager + ) + + def get_model_files( + self, model_id: str, revision: str = "main" + ) -> List[Dict[str, Any]]: + """ + Get list of files for a HuggingFace model using the HF Hub API. + + Args: + model_id: HuggingFace model identifier (e.g., 'gpt2', 'microsoft/DialoGPT-medium') + revision: Model revision/branch (default: 'main') + + Returns: + List of file information dictionaries + """ + try: + # Use HF Hub's native API instead of manual requests + repo_info = self.api.repo_info(model_id, revision=revision) + + files = [] + if repo_info.siblings: + for sibling in repo_info.siblings: + if sibling.rfilename: # Only include actual files + files.append( + { + "path": sibling.rfilename, + "size": getattr(sibling, "size", 0) or 0, + "url": f"https://huggingface.co/{model_id}/resolve/{revision}/{sibling.rfilename}", + } + ) + + return files + + except Exception as e: + self.logger.warning(f"Could not fetch model file list for {model_id}: {e}") + return [] + + def should_accelerate_model(self, model_id: str) -> bool: + """ + Determine if model should be pre-cached using the configured strategy. + + Args: + model_id: HuggingFace model identifier + + Returns: + True if model should be pre-cached + """ + return self.strategy.should_accelerate(model_id) + + def accelerate_model_download( + self, model_id: str, revision: str = "main" + ) -> FunctionResponse: + """ + Pre-download HuggingFace model using the configured download strategy. + + Args: + model_id: HuggingFace model identifier + revision: Model revision/branch + + Returns: + FunctionResponse with download results + """ + return self.strategy.download_model(model_id, revision) + + def is_model_cached(self, model_id: str, revision: str = "main") -> bool: + """ + Check if model is already cached using the configured strategy. + + Args: + model_id: HuggingFace model identifier + revision: Model revision/branch + + Returns: + True if model appears to be cached + """ + return self.strategy.is_model_cached(model_id, revision) + + def get_cache_info(self, model_id: str) -> Dict[str, Any]: + """ + Get cache information for a model using the configured strategy. + + Args: + model_id: HuggingFace model identifier + + Returns: + Dictionary with cache information + """ + return self.strategy.get_cache_info(model_id) + + def clear_model_cache(self, model_id: str) -> FunctionResponse: + """ + Clear cache for a specific model using the configured strategy. + + Args: + model_id: HuggingFace model identifier + + Returns: + FunctionResponse with clearing result + """ + return self.strategy.clear_model_cache(model_id) + + def get_strategy_info(self) -> Dict[str, Any]: + """ + Get information about the current download strategy. + + Returns: + Dictionary with strategy information + """ + strategy_info = HFStrategyFactory.get_strategy_info() + strategy_info["strategy_instance"] = type(self.strategy).__name__ + return strategy_info + + def set_strategy(self, strategy: str) -> None: + """ + Change the download strategy (creates new strategy instance). + + Args: + strategy: Strategy name ("tetra" or "native") + """ + HFStrategyFactory.set_strategy(strategy) + self.strategy = HFStrategyFactory.create_strategy(self.workspace_manager) + self.logger.info(f"Switched to {strategy} download strategy") diff --git a/src/remote_executor.py b/src/remote_executor.py index 0e1ac90..043aba0 100644 --- a/src/remote_executor.py +++ b/src/remote_executor.py @@ -1,4 +1,6 @@ import logging +import asyncio +from typing import List, Any from remote_execution import FunctionRequest, FunctionResponse, RemoteExecutorStub from workspace_manager import WorkspaceManager from dependency_installer import DependencyInstaller @@ -40,27 +42,249 @@ async def ExecuteFunction(self, request: FunctionRequest) -> FunctionResponse: if workspace_init.stdout: self.logger.info(workspace_init.stdout) + # Install dependencies and cache models + if request.accelerate_downloads: + # Run installations in parallel when acceleration is enabled + dep_result = await self._install_dependencies_parallel(request) + if not dep_result.success: + return dep_result + else: + # Sequential installation when acceleration is disabled + dep_result = await self._install_dependencies_sequential(request) + if not dep_result.success: + return dep_result + + # Route to appropriate execution method based on type + execution_type = getattr(request, "execution_type", "function") + + # Execute the function/class + if execution_type == "class": + result = self.class_executor.execute_class_method(request) + else: + result = self.function_executor.execute(request) + + # Add acceleration summary to the result + self._log_acceleration_summary(request, result) + + return result + + def _log_acceleration_summary( + self, request: FunctionRequest, result: FunctionResponse + ): + """Log acceleration impact summary for performance visibility.""" + if not hasattr(self.dependency_installer, "download_accelerator"): + return + + acceleration_enabled = request.accelerate_downloads + has_volume = self.workspace_manager.has_runpod_volume + hf_transfer_available = self.dependency_installer.download_accelerator.hf_transfer_downloader.hf_transfer_available + nala_available = self.dependency_installer._check_nala_available() + + # Build summary message + summary_parts = [] + + if acceleration_enabled: + summary_parts.append("✓ Download acceleration ENABLED") + + if has_volume: + summary_parts.append( + f"✓ Volume workspace: {self.workspace_manager.workspace_path}" + ) + summary_parts.append("✓ Persistent caching enabled") + else: + summary_parts.append("ℹ No persistent volume - using temporary cache") + + # System package acceleration status + if request.system_dependencies: + large_system_packages = ( + self.dependency_installer._identify_large_system_packages( + request.system_dependencies + ) + ) + if large_system_packages and nala_available: + summary_parts.append( + f"✓ System packages with nala: {len(large_system_packages)}" + ) + elif request.system_dependencies: + summary_parts.append("→ System packages using standard apt-get") + + if request.hf_models_to_cache: + summary_parts.append( + f"✓ HF models pre-cached: {len(request.hf_models_to_cache)}" + ) + + elif acceleration_enabled and not (hf_transfer_available or nala_available): + summary_parts.append( + "⚠ Download acceleration REQUESTED but no accelerators available" + ) + summary_parts.append("→ Using standard downloads") + + elif not acceleration_enabled: + summary_parts.append("- Download acceleration DISABLED") + summary_parts.append("→ Using standard downloads") + + # Log the summary + if summary_parts: + self.logger.debug("=== DOWNLOAD ACCELERATION SUMMARY ===") + for part in summary_parts: + self.logger.debug(part) + self.logger.debug("=====================================") + + async def _install_dependencies_parallel( + self, request: FunctionRequest + ) -> FunctionResponse: + """ + Install dependencies and cache models in parallel when acceleration is enabled. + + Args: + request: FunctionRequest with dependencies to install + + Returns: + FunctionResponse indicating overall success/failure + """ + tasks = [] + task_names = [] + + # Add system dependencies task + if request.system_dependencies: + task = self.dependency_installer.install_system_dependencies_async( + request.system_dependencies, request.accelerate_downloads + ) + tasks.append(task) + task_names.append("system_dependencies") + + # Add Python dependencies task + if request.dependencies: + task = self.dependency_installer.install_dependencies_async( + request.dependencies, request.accelerate_downloads + ) + tasks.append(task) + task_names.append("python_dependencies") + + # Add HF model caching tasks + if request.hf_models_to_cache: + for model_id in request.hf_models_to_cache: + task = self.workspace_manager.accelerate_model_download_async(model_id) + tasks.append(task) + task_names.append(f"hf_model_{model_id}") + + if not tasks: + return FunctionResponse(success=True, stdout="No dependencies to install") + + self.logger.info( + f"Starting parallel installation of {len(tasks)} tasks: {task_names}" + ) + + # Execute all tasks in parallel + results = await asyncio.gather(*tasks, return_exceptions=True) + + # Process results and handle failures + return self._process_parallel_results(results, task_names) + + async def _install_dependencies_sequential( + self, request: FunctionRequest + ) -> FunctionResponse: + """ + Install dependencies and cache models sequentially when acceleration is disabled. + + Args: + request: FunctionRequest with dependencies to install + + Returns: + FunctionResponse indicating overall success/failure + """ # Install system dependencies first if request.system_dependencies: sys_installed = self.dependency_installer.install_system_dependencies( - request.system_dependencies + request.system_dependencies, request.accelerate_downloads ) if not sys_installed.success: return sys_installed self.logger.info(sys_installed.stdout) + # Pre-cache HuggingFace models if requested (should not happen when acceleration disabled) + if request.accelerate_downloads and request.hf_models_to_cache: + for model_id in request.hf_models_to_cache: + self.logger.info(f"Pre-caching HuggingFace model: {model_id}") + cache_result = self.workspace_manager.accelerate_model_download( + model_id + ) + if cache_result.success: + self.logger.info( + f"Successfully cached model {model_id}: {cache_result.stdout}" + ) + else: + self.logger.warning( + f"Failed to cache model {model_id}: {cache_result.error}" + ) + # Install Python dependencies next if request.dependencies: py_installed = self.dependency_installer.install_dependencies( - request.dependencies + request.dependencies, request.accelerate_downloads ) if not py_installed.success: return py_installed self.logger.info(py_installed.stdout) - # Route to appropriate execution method based on type - execution_type = getattr(request, "execution_type", "function") - if execution_type == "class": - return self.class_executor.execute_class_method(request) + return FunctionResponse( + success=True, stdout="Dependencies installed successfully" + ) + + def _process_parallel_results( + self, results: List[Any], task_names: List[str] + ) -> FunctionResponse: + """ + Process results from parallel dependency installation tasks. + + Args: + results: List of task results (may include exceptions) + task_names: List of task names corresponding to results + + Returns: + FunctionResponse with aggregated results + """ + success_count = 0 + failures = [] + stdout_parts = [] + + for i, result in enumerate(results): + task_name = task_names[i] + + if isinstance(result, Exception): + # Task raised an exception + error_msg = f"{task_name}: Exception - {str(result)}" + failures.append(error_msg) + self.logger.error(error_msg) + elif isinstance(result, FunctionResponse): + if result.success: + success_count += 1 + stdout_parts.append(f"✓ {task_name}: {result.stdout}") + self.logger.info(f"✓ {task_name} completed successfully") + else: + error_msg = f"{task_name}: {result.error}" + failures.append(error_msg) + self.logger.error(f"✗ {task_name} failed: {result.error}") + else: + # Unexpected result type + error_msg = f"{task_name}: Unexpected result type - {type(result)}" + failures.append(error_msg) + self.logger.error(error_msg) + + # Determine overall success + if failures: + # Some tasks failed + error_summary = f"Failed tasks: {'; '.join(failures)}" + return FunctionResponse( + success=False, + error=error_summary, + stdout=f"Parallel installation: {success_count}/{len(results)} tasks succeeded\n" + + "\n".join(stdout_parts), + ) else: - return self.function_executor.execute(request) + # All tasks succeeded + return FunctionResponse( + success=True, + stdout=f"Parallel installation: {success_count}/{len(results)} tasks completed successfully\n" + + "\n".join(stdout_parts), + ) diff --git a/test-handler.sh b/src/test-handler.sh similarity index 100% rename from test-handler.sh rename to src/test-handler.sh diff --git a/src/test_class_custom_method.json b/src/test_class_custom_method.json new file mode 100644 index 0000000..6dc55b3 --- /dev/null +++ b/src/test_class_custom_method.json @@ -0,0 +1,13 @@ +{ + "input": { + "execution_type": "class", + "class_name": "Calculator", + "class_code": "class Calculator:\n def __init__(self, initial_value=0):\n self.value = initial_value\n self.operation_history = []\n \n def add(self, operand):\n old_value = self.value\n self.value += operand\n self.operation_history.append(f'{old_value} + {operand} = {self.value}')\n return self.value\n \n def multiply(self, operand):\n old_value = self.value\n self.value *= operand\n self.operation_history.append(f'{old_value} * {operand} = {self.value}')\n return self.value\n \n def get_history(self):\n return {\n 'current_value': self.value,\n 'operations': self.operation_history,\n 'operation_count': len(self.operation_history)\n }\n \n def reset(self, new_value=0):\n old_value = self.value\n self.value = new_value\n self.operation_history.append(f'Reset from {old_value} to {new_value}')\n return self.value", + "method_name": "multiply", + "constructor_args": [\n "gAWVCgAAAAAAAABHQCQAAAAAAAAu"\n ], + "constructor_kwargs": {}, + "args": [\n "gAWVCgAAAAAAAABHQBQAAAAAAAAu"\n ], + "kwargs": {}, + "create_new_instance": true + } +} \ No newline at end of file diff --git a/test_class_input.json b/src/test_class_input.json similarity index 100% rename from test_class_input.json rename to src/test_class_input.json diff --git a/src/test_class_persistence.json b/src/test_class_persistence.json new file mode 100644 index 0000000..021907c --- /dev/null +++ b/src/test_class_persistence.json @@ -0,0 +1,12 @@ +{ + "input": { + "execution_type": "class", + "class_name": "PersistentCounter", + "class_code": "class PersistentCounter:\n def __init__(self, initial_value=0):\n self.value = initial_value\n self.call_history = []\n \n def increment(self, amount=1):\n self.value += amount\n self.call_history.append(f'incremented by {amount}')\n return self.value\n \n def get_state(self):\n return {\n 'current_value': self.value,\n 'call_count': len(self.call_history),\n 'call_history': self.call_history\n }", + "method_name": "get_state", + "constructor_args": [\n "gAWVCQAAAAAAAACMATWULg=="\n ], + "constructor_kwargs": {}, + "args": [], + "kwargs": {}, + "instance_id": "test_persistent_counter_001", + "create_new_instance": true\n }\n} \ No newline at end of file diff --git a/src/test_error_scenarios.json b/src/test_error_scenarios.json new file mode 100644 index 0000000..c45c3db --- /dev/null +++ b/src/test_error_scenarios.json @@ -0,0 +1,5 @@ +{ + "input": { + "function_name": "test_error_handling", + "function_code": "def test_error_handling():\n import sys\n import traceback\n \n # This function tests that the handler can gracefully handle errors\n # and return proper error information to the client\n \n results = {\n 'controlled_errors': {},\n 'environment_checks': {},\n 'error_handling_test': 'completed'\n }\n \n # Test 1: Controlled exception that should be caught\n try:\n # This will raise a ZeroDivisionError\n result = 10 / 0\n results['controlled_errors']['division_by_zero'] = 'unexpected_success'\n except ZeroDivisionError as e:\n results['controlled_errors']['division_by_zero'] = {\n 'error_type': str(type(e).__name__),\n 'error_message': str(e),\n 'handled_correctly': True\n }\n \n # Test 2: Import error for non-existent module\n try:\n import non_existent_module_xyz123\n results['controlled_errors']['import_error'] = 'unexpected_success'\n except ImportError as e:\n results['controlled_errors']['import_error'] = {\n 'error_type': str(type(e).__name__),\n 'error_message': str(e),\n 'handled_correctly': True\n }\n \n # Test 3: Test that bad dependencies would fail (but we won't actually use bad deps)\n # This test verifies the function can run with intentionally missing deps\n try:\n # Try to import a package that should exist (this shouldn't fail)\n import json\n results['controlled_errors']['json_import'] = {\n 'imported_successfully': True,\n 'has_dumps_method': hasattr(json, 'dumps')\n }\n except ImportError as e:\n results['controlled_errors']['json_import'] = {\n 'imported_successfully': False,\n 'error': str(e)\n }\n \n # Environment checks\n results['environment_checks'] = {\n 'python_version': sys.version,\n 'platform': sys.platform,\n 'executable': sys.executable\n }\n \n return results\n", + "dependencies": [\"nonexistent-package-xyz123\"],\n "args": [],\n "kwargs": {}\n }\n} \ No newline at end of file diff --git a/src/test_function_args.json b/src/test_function_args.json new file mode 100644 index 0000000..ca84a6d --- /dev/null +++ b/src/test_function_args.json @@ -0,0 +1,6 @@ +{ + "input": { + "function_name": "test_function_with_arguments", + "function_code": "def test_function_with_arguments(number, text, data_list=None, multiplier=2):\n import json\n \n # Validate the arguments were passed correctly\n result = {\n 'received_args': {\n 'number': number,\n 'text': text,\n 'data_list': data_list,\n 'multiplier': multiplier\n },\n 'processed_results': {\n 'number_times_multiplier': number * multiplier,\n 'text_upper': text.upper(),\n 'list_sum': sum(data_list) if data_list else 0,\n 'list_length': len(data_list) if data_list else 0\n },\n 'argument_types': {\n 'number_type': str(type(number)),\n 'text_type': str(type(text)),\n 'data_list_type': str(type(data_list)),\n 'multiplier_type': str(type(multiplier))\n }\n }\n \n return result\n", + "args": [\n "gAVLKi4=",\n "gAWVDwAAAAAAAACMC2hlbGxvIHdvcmxklC4="\n ], + "kwargs": {\n "data_list": "gAWVDwAAAAAAAABdlChLAUsCSwNLBEsFZS4=",\n "multiplier": "gAVLAy4="\n }\n }\n} \ No newline at end of file diff --git a/src/test_hf_accelerated_input.json b/src/test_hf_accelerated_input.json new file mode 100644 index 0000000..7665a0e --- /dev/null +++ b/src/test_hf_accelerated_input.json @@ -0,0 +1,11 @@ +{ + "input": { + "function_name": "test_hf_acceleration_with_volume", + "function_code": "def test_hf_acceleration_with_volume():\n import os\n import time\n from transformers import AutoTokenizer\n \n start_time = time.time()\n \n # Test HF model download with acceleration enabled\n model_name = 'gpt2'\n print(f'Testing accelerated HF model download: {model_name}')\n \n tokenizer = AutoTokenizer.from_pretrained(model_name)\n \n download_time = time.time() - start_time\n \n # Check cache paths\n cache_info = {\n 'hf_home': os.environ.get('HF_HOME'),\n 'transformers_cache': os.environ.get('TRANSFORMERS_CACHE'),\n 'virtual_env': os.environ.get('VIRTUAL_ENV'),\n 'download_time': round(download_time, 2)\n }\n \n print(f'Download completed in {download_time:.2f}s')\n print(f'Cache paths: {cache_info}')\n \n return {\n 'model_name': model_name,\n 'vocab_size': tokenizer.vocab_size,\n 'cache_info': cache_info,\n 'acceleration_enabled': True,\n 'test_completed': True\n }\n", + "dependencies": ["transformers", "torch"], + "accelerate_downloads": true, + "hf_models_to_cache": ["gpt2"], + "args": [], + "kwargs": {} + } +} \ No newline at end of file diff --git a/src/test_hf_no_volume.json b/src/test_hf_no_volume.json new file mode 100644 index 0000000..f72818d --- /dev/null +++ b/src/test_hf_no_volume.json @@ -0,0 +1,11 @@ +{ + "input": { + "function_name": "test_hf_acceleration_no_volume", + "function_code": "def test_hf_acceleration_no_volume():\n import os\n import time\n from transformers import AutoTokenizer\n \n # Test that HF acceleration works without a RunPod volume\n # This was the main fix - acceleration should work regardless of volume presence\n \n start_time = time.time()\n \n model_name = 'gpt2'\n print(f'Testing HF acceleration without volume: {model_name}')\n \n tokenizer = AutoTokenizer.from_pretrained(model_name)\n \n download_time = time.time() - start_time\n \n # Verify environment shows no volume but acceleration works\n env_info = {\n 'hf_home': os.environ.get('HF_HOME'),\n 'transformers_cache': os.environ.get('TRANSFORMERS_CACHE'),\n 'virtual_env': os.environ.get('VIRTUAL_ENV'),\n 'has_runpod_volume': '/runpod-volume' in str(os.environ.get('VIRTUAL_ENV', '')),\n 'download_time': round(download_time, 2)\n }\n \n print(f'Download completed in {download_time:.2f}s without volume')\n print(f'Environment: {env_info}')\n \n return {\n 'model_name': model_name,\n 'vocab_size': tokenizer.vocab_size,\n 'environment': env_info,\n 'acceleration_without_volume': True,\n 'test_completed': True\n }\n", + "dependencies": ["transformers", "torch"], + "accelerate_downloads": true, + "hf_models_to_cache": ["gpt2"], + "args": [], + "kwargs": {} + } +} \ No newline at end of file diff --git a/test_input.json b/src/test_input.json similarity index 100% rename from test_input.json rename to src/test_input.json diff --git a/src/test_mixed_dependencies.json b/src/test_mixed_dependencies.json new file mode 100644 index 0000000..9057599 --- /dev/null +++ b/src/test_mixed_dependencies.json @@ -0,0 +1,10 @@ +{ + "input": { + "function_name": "test_mixed_dependencies", + "function_code": "def test_mixed_dependencies():\n import subprocess\n import json\n import os\n \n # Test that both system and Python dependencies are available\n results = {\n 'system_dependencies': {},\n 'python_dependencies': {},\n 'environment_info': {}\n }\n \n # Test system dependency (wget)\n try:\n wget_result = subprocess.run(['wget', '--version'], \n capture_output=True, text=True, timeout=10)\n results['system_dependencies']['wget'] = {\n 'available': wget_result.returncode == 0,\n 'version': wget_result.stdout.split('\\n')[0] if wget_result.returncode == 0 else None,\n 'error': wget_result.stderr if wget_result.returncode != 0 else None\n }\n except Exception as e:\n results['system_dependencies']['wget'] = {\n 'available': False,\n 'error': str(e)\n }\n \n # Test Python dependencies\n try:\n import requests\n results['python_dependencies']['requests'] = {\n 'available': True,\n 'version': requests.__version__,\n 'location': requests.__file__\n }\n except ImportError as e:\n results['python_dependencies']['requests'] = {\n 'available': False,\n 'error': str(e)\n }\n \n try:\n import numpy\n results['python_dependencies']['numpy'] = {\n 'available': True,\n 'version': numpy.__version__,\n 'location': numpy.__file__\n }\n # Test numpy functionality\n arr = numpy.array([1, 2, 3, 4, 5])\n results['python_dependencies']['numpy']['test_result'] = {\n 'array_sum': int(arr.sum()),\n 'array_mean': float(arr.mean())\n }\n except ImportError as e:\n results['python_dependencies']['numpy'] = {\n 'available': False,\n 'error': str(e)\n }\n \n # Environment info\n results['environment_info'] = {\n 'running_as_root': os.getuid() == 0 if hasattr(os, 'getuid') else False,\n 'virtual_env': os.environ.get('VIRTUAL_ENV'),\n 'python_path': os.environ.get('PYTHONPATH')\n }\n \n return results\n", + "dependencies": ["requests", "numpy"], + "system_dependencies": ["wget"], + "args": [], + "kwargs": {} + } +} \ No newline at end of file diff --git a/src/test_system_dependencies.json b/src/test_system_dependencies.json new file mode 100644 index 0000000..12ee909 --- /dev/null +++ b/src/test_system_dependencies.json @@ -0,0 +1,9 @@ +{ + "input": { + "function_name": "test_system_dependencies", + "function_code": "def test_system_dependencies():\n import subprocess\n import os\n \n # Test that system packages were installed successfully\n # We'll test with curl which is commonly available or gets installed\n \n result = {}\n \n # Test if curl command is available\n try:\n curl_result = subprocess.run(['curl', '--version'], \n capture_output=True, text=True, timeout=10)\n if curl_result.returncode == 0:\n result['curl_available'] = True\n result['curl_version'] = curl_result.stdout.split('\\n')[0]\n else:\n result['curl_available'] = False\n result['curl_error'] = curl_result.stderr\n except Exception as e:\n result['curl_available'] = False\n result['curl_error'] = str(e)\n \n # Test if git command is available (should be pre-installed in most containers)\n try:\n git_result = subprocess.run(['git', '--version'],\n capture_output=True, text=True, timeout=10)\n if git_result.returncode == 0:\n result['git_available'] = True\n result['git_version'] = git_result.stdout.strip()\n else:\n result['git_available'] = False\n result['git_error'] = git_result.stderr\n except Exception as e:\n result['git_available'] = False\n result['git_error'] = str(e)\n \n # Check if we're running as root (needed for apt install)\n result['running_as_root'] = os.getuid() == 0 if hasattr(os, 'getuid') else False\n result['environment_check'] = 'system_deps_test_completed'\n \n return result\n", + "system_dependencies": ["curl"], + "args": [], + "kwargs": {} + } +} \ No newline at end of file diff --git a/src/test_uv_no_acceleration.json b/src/test_uv_no_acceleration.json new file mode 100644 index 0000000..a3099e3 --- /dev/null +++ b/src/test_uv_no_acceleration.json @@ -0,0 +1,10 @@ +{ + "input": { + "function_name": "test_uv_installation_without_acceleration", + "function_code": "def test_uv_installation_without_acceleration():\n import json\n import sys\n \n # Test that packages installed with UV (accelerate_downloads=False) are available\n try:\n import requests\n import transformers\n \n # Get package locations to verify they're in the right place\n requests_location = requests.__file__\n transformers_location = transformers.__file__\n \n # Check if we're using the virtual environment\n venv_active = hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix)\n \n return {\n 'success': True,\n 'message': 'Both requests and transformers imported successfully with UV (no acceleration)',\n 'requests_location': requests_location,\n 'transformers_location': transformers_location,\n 'virtual_env_active': venv_active,\n 'python_prefix': sys.prefix\n }\n except ImportError as e:\n return {\n 'success': False,\n 'error': f'Failed to import packages: {str(e)}',\n 'python_prefix': sys.prefix,\n 'virtual_env_active': hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix)\n }\n", + "dependencies": ["requests", "transformers"], + "accelerate_downloads": false, + "args": [], + "kwargs": {} + } +} \ No newline at end of file diff --git a/src/workspace_manager.py b/src/workspace_manager.py index 38f1982..1276a00 100644 --- a/src/workspace_manager.py +++ b/src/workspace_manager.py @@ -3,7 +3,11 @@ import fcntl import time import logging -from typing import Optional +import asyncio +from typing import Optional, TYPE_CHECKING, Any, Dict + +if TYPE_CHECKING: + from huggingface_accelerator import HuggingFaceAccelerator from remote_execution import FunctionResponse from constants import ( @@ -46,6 +50,9 @@ def __init__(self) -> None: self.cache_path = None self.hf_cache_path = None + # Initialize HuggingFace accelerator after paths are set + self._hf_accelerator: Optional[HuggingFaceAccelerator] = None + if self.has_runpod_volume: self._configure_uv_cache() self._configure_huggingface_cache() @@ -62,19 +69,14 @@ def _configure_huggingface_cache(self): # Ensure HF cache directory exists os.makedirs(self.hf_cache_path, exist_ok=True) - # Set main HF cache directory + # Set main HF cache directory - HF will automatically create subdirectories os.environ["HF_HOME"] = self.hf_cache_path - # Set specific cache paths for different HF components - os.environ["TRANSFORMERS_CACHE"] = os.path.join( - self.hf_cache_path, "transformers" - ) - os.environ["HF_DATASETS_CACHE"] = os.path.join( - self.hf_cache_path, "datasets" - ) - os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join( - self.hf_cache_path, "hub" - ) + # HF automatically creates and manages these subdirectories: + # - hub/ (for model downloads and cache) + # - transformers/ (legacy, but still used by some components) + # - datasets/ (for HF datasets) + # Let HF handle the hierarchy instead of forcing specific paths def _configure_volume_environment(self): """Configure environment variables for volume usage.""" @@ -371,3 +373,69 @@ def _remove_broken_virtual_environment(self): self.logger.error( f"Error removing broken virtual environment: {str(e)}" ) + + @property + def hf_accelerator(self) -> "HuggingFaceAccelerator": + """Lazy-loaded HuggingFace accelerator.""" + if self._hf_accelerator is None: + from huggingface_accelerator import HuggingFaceAccelerator + + self._hf_accelerator = HuggingFaceAccelerator(self) + return self._hf_accelerator + + def accelerate_model_download( + self, model_id: str, revision: str = "main" + ) -> FunctionResponse: + """ + Pre-download HuggingFace model using acceleration if beneficial. + + Args: + model_id: HuggingFace model identifier + revision: Model revision/branch + + Returns: + FunctionResponse with download result + """ + return self.hf_accelerator.accelerate_model_download(model_id, revision) + + async def accelerate_model_download_async( + self, model_id: str, revision: str = "main" + ) -> FunctionResponse: + """ + Async wrapper for HuggingFace model download acceleration. + + Args: + model_id: HuggingFace model identifier + revision: Model revision/branch + + Returns: + FunctionResponse with download result + """ + return await asyncio.to_thread( + self.accelerate_model_download, model_id, revision + ) + + def is_model_cached(self, model_id: str, revision: str = "main") -> bool: + """ + Check if a HuggingFace model is cached. + + Args: + model_id: HuggingFace model identifier + revision: Model revision/branch + + Returns: + True if model is cached + """ + return self.hf_accelerator.is_model_cached(model_id, revision) + + def get_model_cache_info(self, model_id: str) -> Dict[str, Any]: + """ + Get cache information for a HuggingFace model. + + Args: + model_id: HuggingFace model identifier + + Returns: + Dictionary with cache information + """ + return self.hf_accelerator.get_cache_info(model_id) diff --git a/test_debug_input.json b/test_debug_input.json deleted file mode 100644 index 5c8db78..0000000 --- a/test_debug_input.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "input": { - "function_name": "debug_logging_test", - "function_code": "def debug_logging_test():\n import logging\n logger = logging.getLogger(__name__)\n \n # Test all log levels to verify DEBUG is shown\n logger.debug(\"DEBUG: This should be visible when LOG_LEVEL=DEBUG\")\n logger.info(\"INFO: This should always be visible\")\n logger.warning(\"WARNING: This should always be visible\")\n logger.error(\"ERROR: This should always be visible\")\n \n print(\"Standard output from function execution\")\n \n return {\n \"message\": \"Debug logging test completed\",\n \"current_log_level\": logging.getLogger().level,\n \"level_name\": logging.getLevelName(logging.getLogger().level)\n }\n", - "args": [], - "kwargs": {} - } -} diff --git a/test_hf_input.json b/test_hf_input.json deleted file mode 100644 index 9dd0c92..0000000 --- a/test_hf_input.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "input": { - "function_name": "test_hf_model_download", - "function_code": "def test_hf_model_download():\n import os\n from transformers import AutoTokenizer\n \n # Test downloading a small model\n model_name = 'gpt2'\n tokenizer = AutoTokenizer.from_pretrained(model_name)\n \n # Verify cache environment variables are set\n hf_home = os.environ.get('HF_HOME')\n transformers_cache = os.environ.get('TRANSFORMERS_CACHE')\n \n result = {\n 'model_loaded': True,\n 'vocab_size': tokenizer.vocab_size,\n 'hf_home': hf_home,\n 'transformers_cache': transformers_cache,\n 'cache_configured': hf_home is not None and transformers_cache is not None\n }\n \n return result\n", - "dependencies": ["transformers", "torch"], - "args": [], - "kwargs": {} - } -} diff --git a/test_subprocess_debug.json b/test_subprocess_debug.json deleted file mode 100644 index 4d2a028..0000000 --- a/test_subprocess_debug.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "input": { - "function_code": "import subprocess\nimport os\nimport sys\ndef debug_subprocess_environment():\n \"\"\"Debug subprocess environment to understand vLLM issue.\"\"\"\n results = []\n \n # Check symlink status\n app_venv_path = '/app/.venv'\n if os.path.exists(app_venv_path):\n if os.path.islink(app_venv_path):\n target = os.readlink(app_venv_path)\n results.append(f'✓ Symlink exists: {app_venv_path} -> {target}')\n else:\n results.append(f'✗ {app_venv_path} is not a symlink')\n else:\n results.append(f'✗ {app_venv_path} does not exist')\n \n # Check if target venv has vllm\n try:\n if os.path.islink(app_venv_path):\n target = os.readlink(app_venv_path)\n vllm_path = f'{target}/lib/python*/site-packages/vllm'\n import glob\n vllm_dirs = glob.glob(vllm_path)\n if vllm_dirs:\n results.append(f'✓ vLLM found in target venv: {vllm_dirs[0]}')\n else:\n results.append(f'✗ vLLM not found in target venv (searched: {vllm_path})')\n except Exception as e:\n results.append(f'Error checking vLLM in target: {e}')\n \n # Test subprocess execution with explicit environment\n results.append('')\n results.append('=== Subprocess Tests ===')\n \n # Test 1: Direct python version from symlink\n try:\n result = subprocess.run(\n ['/app/.venv/bin/python3', '--version'],\n capture_output=True, text=True, timeout=10\n )\n if result.returncode == 0:\n results.append(f'✓ Python version from symlink: {result.stdout.strip()}')\n else:\n results.append(f'✗ Python failed: {result.stderr.strip()}')\n except Exception as e:\n results.append(f'✗ Python subprocess error: {e}')\n \n # Test 2: Check if vllm module is accessible\n try:\n result = subprocess.run(\n ['/app/.venv/bin/python3', '-c', 'import vllm; print(\"vLLM import successful\")'],\n capture_output=True, text=True, timeout=10\n )\n if result.returncode == 0:\n results.append(f'✓ vLLM import from subprocess: {result.stdout.strip()}')\n else:\n results.append(f'✗ vLLM import failed: {result.stderr.strip()}')\n except Exception as e:\n results.append(f'✗ vLLM import subprocess error: {e}')\n \n # Test 3: Check Python path in subprocess\n try:\n result = subprocess.run(\n ['/app/.venv/bin/python3', '-c', 'import sys; print(\"PYTHONPATH:\", sys.path[:3])'],\n capture_output=True, text=True, timeout=10\n )\n if result.returncode == 0:\n results.append(f'✓ Subprocess Python path: {result.stdout.strip()}')\n else:\n results.append(f'✗ Python path check failed: {result.stderr.strip()}')\n except Exception as e:\n results.append(f'✗ Python path subprocess error: {e}')\n \n # Test 4: Current process environment\n results.append('')\n results.append('=== Current Process Environment ===')\n results.append(f'VIRTUAL_ENV: {os.environ.get(\"VIRTUAL_ENV\", \"Not set\")}')\n results.append(f'PATH: {os.environ.get(\"PATH\", \"Not set\")[:200]}...')\n results.append(f'Current Python path: {sys.executable}')\n \n return '\\n'.join(results)", - "function_name": "debug_subprocess_environment", - "args": [], - "kwargs": {}, - "dependencies": ["vllm"] - } -} diff --git a/test_vllm_symlink.json b/test_vllm_symlink.json deleted file mode 100644 index 2bd325d..0000000 --- a/test_vllm_symlink.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "input": { - "function_code": "import subprocess\nimport os\ndef test_app_venv_symlink():\n \"\"\"Test that /app/.venv symlink works correctly and demonstrate the fix for vLLM.\"\"\"\n results = []\n \n # Check if we're running with RunPod volume\n has_volume = os.path.exists('/runpod-volume')\n results.append(f'RunPod volume available: {has_volume}')\n \n # Check if /app/.venv exists and is a symlink\n app_venv_path = '/app/.venv'\n if os.path.exists(app_venv_path):\n if os.path.islink(app_venv_path):\n target = os.readlink(app_venv_path)\n results.append(f'SUCCESS: {app_venv_path} is symlink -> {target}')\n else:\n results.append(f'INFO: {app_venv_path} exists but is not a symlink (expected for local testing)')\n else:\n results.append(f'INFO: {app_venv_path} does not exist')\n \n # Test if we can access python from /app/.venv/bin/python3\n try:\n result = subprocess.run(['/app/.venv/bin/python3', '--version'], capture_output=True, text=True, timeout=5)\n if result.returncode == 0:\n results.append(f'SUCCESS: Python accessible from /app/.venv: {result.stdout.strip()}')\n else:\n results.append(f'ERROR: Python failed from /app/.venv: {result.stderr}')\n except subprocess.TimeoutExpired:\n results.append('ERROR: Python command from /app/.venv timed out')\n except Exception as e:\n results.append(f'INFO: Cannot run python from /app/.venv (expected for local): {str(e)}')\n \n # Simulate what vLLM would encounter - explain the fix\n results.append('')\n results.append('=== vLLM Fix Explanation ===')\n if has_volume:\n results.append('With RunPod volume: /app/.venv -> /runpod-volume/runtimes/{endpoint}/.venv')\n results.append('vLLM subprocess calls to /app/.venv/bin/python3 will use volume venv')\n else:\n results.append('Without RunPod volume: /app/.venv is the container default venv')\n results.append('This is the local testing scenario')\n \n return '\\n'.join(results)", - "function_name": "test_app_venv_symlink", - "args": [], - "kwargs": {}, - "dependencies": [] - } -} \ No newline at end of file diff --git a/tests/integration/test_dependency_management.py b/tests/integration/test_dependency_management.py index 16737f3..ad4e1ca 100644 --- a/tests/integration/test_dependency_management.py +++ b/tests/integration/test_dependency_management.py @@ -1,5 +1,5 @@ import pytest -from unittest.mock import patch, MagicMock +from unittest.mock import patch, MagicMock, AsyncMock from remote_executor import RemoteExecutor from remote_execution import FunctionRequest @@ -36,7 +36,6 @@ def test_install_python_dependencies_integration(self): "uv", "pip", "install", - "--no-cache-dir", "requests", "numpy", ] @@ -66,7 +65,7 @@ def test_install_system_dependencies_integration(self): mock_popen.side_effect = [mock_update_process, mock_install_process] result = executor.dependency_installer.install_system_dependencies( - ["curl", "wget"] + ["curl", "wget"], accelerate_downloads=False ) assert result.success is True @@ -113,29 +112,41 @@ def test_with_deps(): with ( patch.object( - executor.dependency_installer, "install_dependencies" + executor.dependency_installer, + "install_dependencies_async", + new_callable=AsyncMock, ) as mock_py_deps, patch.object( - executor.dependency_installer, "install_system_dependencies" + executor.dependency_installer, + "install_system_dependencies_async", + new_callable=AsyncMock, ) as mock_sys_deps, patch.object(executor.function_executor, "execute") as mock_execute, ): # Mock successful dependency installations - mock_sys_deps.return_value = type( - "obj", (object,), {"success": True, "stdout": "system deps installed"} - )() - mock_py_deps.return_value = type( - "obj", (object,), {"success": True, "stdout": "python deps installed"} - )() + from remote_execution import FunctionResponse + + mock_sys_deps.return_value = FunctionResponse( + success=True, stdout="system deps installed" + ) + mock_py_deps.return_value = FunctionResponse( + success=True, stdout="python deps installed" + ) mock_execute.return_value = type( - "obj", (object,), {"success": True, "result": "encoded_result"} + "obj", + (object,), + { + "success": True, + "result": "encoded_result", + "stdout": "function executed", + }, )() result = await executor.ExecuteFunction(request) # Verify all steps were called - mock_sys_deps.assert_called_once_with(["curl"]) - mock_py_deps.assert_called_once_with(["requests"]) + mock_sys_deps.assert_called_once_with(["curl"], True) + mock_py_deps.assert_called_once_with(["requests"], True) mock_execute.assert_called_once_with(request) assert result.success is True @@ -178,7 +189,9 @@ def test_system_dependency_update_failure(self): ) mock_popen.return_value = mock_process - result = executor.dependency_installer.install_system_dependencies(["curl"]) + result = executor.dependency_installer.install_system_dependencies( + ["curl"], accelerate_downloads=False + ) assert result.success is False assert result.error == "Error updating package list" @@ -198,20 +211,20 @@ async def test_dependency_failure_stops_execution(self): with ( patch.object( - executor.dependency_installer, "install_dependencies" + executor.dependency_installer, + "install_dependencies_async", + new_callable=AsyncMock, ) as mock_deps, patch.object(executor.function_executor, "execute") as mock_execute, ): # Mock failed dependency installation - mock_deps.return_value = type( - "obj", - (object,), - { - "success": False, - "error": "Error installing packages", - "stdout": "error details", - }, - )() + from remote_execution import FunctionResponse + + mock_deps.return_value = FunctionResponse( + success=False, + error="Error installing packages", + stdout="error details", + ) result = await executor.ExecuteFunction(request) @@ -220,7 +233,7 @@ async def test_dependency_failure_stops_execution(self): # Verify failure response assert result.success is False - assert result.error == "Error installing packages" + assert "Error installing packages" in result.error @pytest.mark.integration def test_empty_dependency_lists(self): @@ -258,7 +271,6 @@ def test_dependency_command_construction(self): "uv", "pip", "install", - "--no-cache-dir", "package1", "package2>=1.0.0", ] @@ -278,7 +290,9 @@ def test_dependency_command_construction(self): mock_popen.side_effect = [mock_update, mock_install] # Test system dependency command - executor.dependency_installer.install_system_dependencies(["pkg1", "pkg2"]) + executor.dependency_installer.install_system_dependencies( + ["pkg1", "pkg2"], accelerate_downloads=False + ) install_call = mock_popen.call_args_list[1] expected_cmd = [ @@ -311,8 +325,180 @@ def test_exception_handling_in_dependency_installation(self): # Test system dependency exception sys_result = executor.dependency_installer.install_system_dependencies( - ["some-package"] + ["some-package"], accelerate_downloads=False ) assert sys_result.success is False assert "Exception during system package installation" in sys_result.error assert "Subprocess error" in sys_result.error + + @pytest.mark.integration + def test_system_dependency_installation_with_nala_acceleration(self): + """Test system dependency installation with nala acceleration enabled.""" + executor = RemoteExecutor() + + with patch("subprocess.Popen") as mock_popen: + # Mock nala availability check + nala_check = MagicMock() + nala_check.returncode = 0 + nala_check.communicate.return_value = (b"/usr/bin/nala", b"") + + # Mock nala update + nala_update = MagicMock() + nala_update.returncode = 0 + nala_update.communicate.return_value = (b"Reading package lists...", b"") + + # Mock nala install + nala_install = MagicMock() + nala_install.returncode = 0 + nala_install.communicate.return_value = ( + b"Successfully installed build-essential", + b"", + ) + + mock_popen.side_effect = [nala_check, nala_update, nala_install] + + result = executor.dependency_installer.install_system_dependencies( + ["build-essential"], accelerate_downloads=True + ) + + assert result.success is True + assert "Installed with nala acceleration" in result.stdout + + # Verify nala commands were used + calls = mock_popen.call_args_list + assert len(calls) == 3 + assert calls[0][0][0] == ["which", "nala"] # Availability check + assert calls[1][0][0] == ["nala", "update"] # Update + assert calls[2][0][0] == [ + "nala", + "install", + "-y", + "build-essential", + ] # Install + + @pytest.mark.integration + def test_system_dependency_installation_nala_fallback(self): + """Test system dependency installation fallback when nala fails.""" + executor = RemoteExecutor() + + with patch("subprocess.Popen") as mock_popen: + # Mock nala availability check + nala_check = MagicMock() + nala_check.returncode = 0 + nala_check.communicate.return_value = (b"/usr/bin/nala", b"") + + # Mock nala update failure + nala_update = MagicMock() + nala_update.returncode = 1 + nala_update.communicate.return_value = (b"", b"nala update failed") + + # Mock successful apt-get fallback + apt_update = MagicMock() + apt_update.returncode = 0 + apt_update.communicate.return_value = (b"Reading package lists...", b"") + + apt_install = MagicMock() + apt_install.returncode = 0 + apt_install.communicate.return_value = ( + b"Successfully installed python3-dev", + b"", + ) + + mock_popen.side_effect = [nala_check, nala_update, apt_update, apt_install] + + result = executor.dependency_installer.install_system_dependencies( + ["python3-dev"], accelerate_downloads=True + ) + + assert result.success is True + assert "Installed with nala acceleration" not in result.stdout + + # Verify fallback to apt-get was used + calls = mock_popen.call_args_list + assert len(calls) == 4 + assert calls[2][0][0] == ["apt-get", "update"] # apt-get update + assert calls[3][0][0] == [ + "apt-get", + "install", + "-y", + "--no-install-recommends", + "python3-dev", + ] + + @pytest.mark.integration + def test_system_dependency_installation_no_nala_available(self): + """Test system dependency installation when nala is not available.""" + executor = RemoteExecutor() + + with patch("subprocess.Popen") as mock_popen: + # Mock nala not available + nala_check = MagicMock() + nala_check.returncode = 1 + nala_check.communicate.return_value = (b"", b"which: nala: not found") + + # Mock successful apt-get operations + apt_update = MagicMock() + apt_update.returncode = 0 + apt_update.communicate.return_value = (b"Reading package lists...", b"") + + apt_install = MagicMock() + apt_install.returncode = 0 + apt_install.communicate.return_value = (b"Successfully installed gcc", b"") + + mock_popen.side_effect = [nala_check, apt_update, apt_install] + + result = executor.dependency_installer.install_system_dependencies( + ["gcc"], accelerate_downloads=True + ) + + assert result.success is True + assert "Installed with nala acceleration" not in result.stdout + + # Verify standard apt-get was used + calls = mock_popen.call_args_list + assert len(calls) == 3 + assert calls[1][0][0] == ["apt-get", "update"] + assert calls[2][0][0] == [ + "apt-get", + "install", + "-y", + "--no-install-recommends", + "gcc", + ] + + @pytest.mark.integration + def test_system_dependency_installation_with_small_packages(self): + """Test system dependency installation with small packages (no acceleration).""" + executor = RemoteExecutor() + + with patch("subprocess.Popen") as mock_popen: + # Mock apt-get operations (should be used for small packages) + apt_update = MagicMock() + apt_update.returncode = 0 + apt_update.communicate.return_value = (b"Reading package lists...", b"") + + apt_install = MagicMock() + apt_install.returncode = 0 + apt_install.communicate.return_value = (b"Successfully installed nano", b"") + + mock_popen.side_effect = [apt_update, apt_install] + + result = executor.dependency_installer.install_system_dependencies( + ["nano", "vim"], accelerate_downloads=True + ) + + assert result.success is True + assert "Installed with nala acceleration" not in result.stdout + + # Should use apt-get because these are not large packages + calls = mock_popen.call_args_list + assert len(calls) == 2 + assert calls[0][0][0] == ["apt-get", "update"] + assert calls[1][0][0] == [ + "apt-get", + "install", + "-y", + "--no-install-recommends", + "nano", + "vim", + ] diff --git a/tests/integration/test_download_acceleration_integration.py b/tests/integration/test_download_acceleration_integration.py new file mode 100644 index 0000000..1dcea96 --- /dev/null +++ b/tests/integration/test_download_acceleration_integration.py @@ -0,0 +1,364 @@ +""" +Integration tests for download acceleration functionality using hf_transfer. +""" + +import pytest +import tempfile +import shutil +from pathlib import Path +from unittest.mock import Mock, patch, AsyncMock + +from src.download_accelerator import ( + DownloadAccelerator, + HfTransferDownloader, +) +from src.huggingface_accelerator import HuggingFaceAccelerator +from src.dependency_installer import DependencyInstaller +from src.workspace_manager import WorkspaceManager +from src.remote_executor import RemoteExecutor +from src.remote_execution import FunctionRequest + + +class TestDownloadAccelerationIntegration: + """Integration tests for download acceleration components.""" + + def setup_method(self): + """Set up test environment.""" + self.temp_dir = Path(tempfile.mkdtemp()) + self.mock_workspace_manager = Mock(spec=WorkspaceManager) + self.mock_workspace_manager.has_runpod_volume = True + self.mock_workspace_manager.hf_cache_path = str(self.temp_dir / ".hf-cache") + self.mock_workspace_manager.workspace_path = str(self.temp_dir) + self.mock_workspace_manager.venv_path = str(self.temp_dir / ".venv") + + def teardown_method(self): + """Clean up test environment.""" + shutil.rmtree(self.temp_dir, ignore_errors=True) + + @patch("src.download_accelerator.HF_TRANSFER_ENABLED", True) + def test_hf_transfer_availability_detection(self): + """Test detection of hf_transfer availability.""" + with patch("importlib.util.find_spec") as mock_find_spec: + # Test when hf_transfer is available + mock_find_spec.return_value = Mock() # Not None means available + downloader = HfTransferDownloader() + assert downloader.hf_transfer_available is True + + # Test when hf_transfer is not available + mock_find_spec.return_value = None # None means not available + downloader = HfTransferDownloader() + assert downloader.hf_transfer_available is False + + def test_download_accelerator_decision_logic(self): + """Test when acceleration should be used.""" + accelerator = DownloadAccelerator(self.mock_workspace_manager) + + # Mock hf_transfer as available + accelerator.hf_transfer_downloader.hf_transfer_available = True + + # Should accelerate large HuggingFace files + assert ( + accelerator.should_accelerate_download( + "https://huggingface.co/model/resolve/main/large.bin", 50.0 + ) + is True + ) + + # Should accelerate HuggingFace URLs regardless of size + assert ( + accelerator.should_accelerate_download( + "https://huggingface.co/model/resolve/main/file", 5.0 + ) + is True + ) + + # Should not accelerate non-HF files + assert ( + accelerator.should_accelerate_download("http://example.com/large.bin", 50.0) + is False + ) + assert ( + accelerator.should_accelerate_download("http://example.com/small.txt", 1.0) + is False + ) + + @patch("src.huggingface_accelerator.HfApi.repo_info") + def test_hf_model_file_fetching(self, mock_repo_info): + """Test fetching HuggingFace model file information.""" + # Mock successful API response using HF Hub's native API + from unittest.mock import Mock + + mock_repo_info_obj = Mock() + mock_repo_info_obj.siblings = [ + Mock(rfilename="pytorch_model.bin", size=500 * 1024 * 1024), # 500MB + Mock(rfilename="config.json", size=1024), # 1KB + ] + mock_repo_info.return_value = mock_repo_info_obj + + accelerator = HuggingFaceAccelerator(self.mock_workspace_manager) + files = accelerator.get_model_files("gpt2") + + assert len(files) == 2 + assert files[0]["path"] == "pytorch_model.bin" + assert files[0]["size"] == 500 * 1024 * 1024 + assert "huggingface.co/gpt2/resolve/main/pytorch_model.bin" in files[0]["url"] + + def test_hf_model_acceleration_decision(self): + """Test when HuggingFace models should be pre-cached.""" + accelerator = HuggingFaceAccelerator(self.mock_workspace_manager) + + # Should pre-cache known large models (HF handles acceleration automatically) + assert accelerator.should_accelerate_model("gpt2") is True + assert accelerator.should_accelerate_model("bert-base-uncased") is True + assert accelerator.should_accelerate_model("microsoft/DialoGPT-medium") is True + assert accelerator.should_accelerate_model("stable-diffusion-v1-5") is True + + # Should not pre-cache unknown/small models + assert accelerator.should_accelerate_model("unknown/tiny-model") is False + + @patch("src.workspace_manager.WorkspaceManager.__init__") + def test_remote_executor_with_acceleration(self, mock_workspace_init): + """Test RemoteExecutor integration with download acceleration.""" + # Mock workspace manager + mock_workspace_init.return_value = None + + executor = RemoteExecutor() + executor.workspace_manager = self.mock_workspace_manager + executor.workspace_manager.has_runpod_volume = True + executor.workspace_manager.initialize_workspace = Mock( + return_value=Mock(success=True) + ) + executor.workspace_manager.accelerate_model_download = Mock( + return_value=Mock(success=True, stdout="Model cached successfully") + ) + + # Mock dependency installer + executor.dependency_installer = Mock() + executor.dependency_installer.install_system_dependencies = Mock( + return_value=Mock(success=True, stdout="System deps installed") + ) + executor.dependency_installer.install_dependencies_async = AsyncMock( + return_value=Mock(success=True, stdout="Python deps installed") + ) + executor.workspace_manager.accelerate_model_download_async = AsyncMock( + return_value=Mock(success=True, stdout="Model cached") + ) + executor.dependency_installer._identify_large_packages = Mock( + return_value=["torch", "transformers"] + ) + executor.dependency_installer.download_accelerator = Mock() + executor.dependency_installer.download_accelerator.hf_transfer_downloader = ( + Mock() + ) + executor.dependency_installer.download_accelerator.hf_transfer_downloader.hf_transfer_available = True + + # Mock executors + executor.function_executor = Mock() + executor.function_executor.execute = Mock( + return_value=Mock(success=True, result="Function executed") + ) + + # Create request with acceleration enabled + request = FunctionRequest( + function_name="test_function", + function_code="def test_function(): return 'test'", + dependencies=["torch", "transformers"], + accelerate_downloads=True, + hf_models_to_cache=["gpt2", "bert-base-uncased"], + ) + + # Execute function + import asyncio + + asyncio.run(executor.ExecuteFunction(request)) + + # Verify model caching was attempted (async method is called) + assert ( + executor.workspace_manager.accelerate_model_download_async.call_count == 2 + ) + executor.workspace_manager.accelerate_model_download_async.assert_any_call( + "gpt2" + ) + executor.workspace_manager.accelerate_model_download_async.assert_any_call( + "bert-base-uncased" + ) + + # Verify dependencies were installed with acceleration enabled (async method) + executor.dependency_installer.install_dependencies_async.assert_called_once_with( + ["torch", "transformers"], True + ) + + @patch.dict("os.environ", {"HF_TOKEN": "test_token"}) + def test_hf_token_authentication(self): + """Test that HF_TOKEN is properly used for authentication.""" + downloader = HfTransferDownloader() + # Test that downloader correctly checks for availability + # Since hf_transfer may not be installed, this will be False + # and that's expected behavior + assert isinstance(downloader.hf_transfer_available, bool) + + def test_strategy_selection_logic(self): + """Test the download strategy selection logic.""" + accelerator = DownloadAccelerator(self.mock_workspace_manager) + accelerator.hf_transfer_downloader.hf_transfer_available = True + + # Test file caching detection + non_existent_file = str(self.temp_dir / "non_existent.bin") + existing_file = str(self.temp_dir / "existing.bin") + + # Create existing file + Path(existing_file).write_bytes(b"existing data") + + assert accelerator.is_file_cached(non_existent_file) is False + assert accelerator.is_file_cached(existing_file) is True + + def test_fallback_behavior_without_accelerators(self): + """Test graceful fallback when accelerators are not available.""" + accelerator = DownloadAccelerator(self.mock_workspace_manager) + accelerator.hf_transfer_downloader.hf_transfer_available = False + + # With new logic, when acceleration is not available, we defer to HF native handling + result = accelerator.download_with_fallback( + "https://huggingface.co/gpt2/resolve/main/file.bin", + str(self.temp_dir / "file.bin"), + ) + + # Should return failure and defer to HF native handling + assert result.success is False + assert "defer to HF native handling" in result.error + + @patch("src.dependency_installer.subprocess.Popen") + def test_dependency_installation_without_acceleration(self, mock_popen): + """Test that packages install normally without aria2c acceleration.""" + # Mock successful installation + mock_process = Mock() + mock_process.returncode = 0 + mock_process.communicate.return_value = (b"Installed successfully", b"") + mock_popen.return_value = mock_process + + installer = DependencyInstaller(self.mock_workspace_manager) + + # Install packages + packages = ["torch==2.0.0", "transformers>=4.20.0"] + result = installer.install_dependencies(packages) + + assert result.success is True + + # Verify the installation was called + mock_popen.assert_called_once() + args, _ = mock_popen.call_args + assert set(packages).issubset(args[0]) + + @patch("src.hf_downloader_tetra.DownloadAccelerator") + def test_model_cache_management(self, mock_download_accelerator): + """Test model cache information and management using tetra strategy.""" + accelerator = HuggingFaceAccelerator(self.mock_workspace_manager) + + # Test cache info for non-existent model + cache_info = accelerator.get_cache_info("non-existent-model") + assert cache_info["cached"] is False + assert cache_info["cache_size_mb"] == 0 + assert cache_info["file_count"] == 0 + + # Create mock cache files for existing model + model_cache_dir = self.temp_dir / ".hf-cache" / "transformers" / "gpt2" + model_cache_dir.mkdir(parents=True, exist_ok=True) + + # Create mock model files + config_file = model_cache_dir / "config.json" + model_file = model_cache_dir / "pytorch_model.bin" + + config_file.write_text('{"model_type": "gpt2"}') # ~25 bytes + model_file.write_bytes(b"0" * (150 * 1024 * 1024)) # 150MB of zeros + + # Test cache info for cached model + cache_info = accelerator.get_cache_info("gpt2") + assert cache_info["cached"] is True + assert ( + abs(cache_info["cache_size_mb"] - 150.0) < 0.1 + ) # Allow for small differences + assert cache_info["file_count"] == 2 + + # Test cache clearing + result = accelerator.clear_model_cache("gpt2") + assert result.success is True + assert not model_cache_dir.exists() + + +class TestDownloadAccelerationErrorHandling: + """Test error handling and edge cases in download acceleration.""" + + def setup_method(self): + """Set up test environment.""" + self.temp_dir = Path(tempfile.mkdtemp()) + + def teardown_method(self): + """Clean up test environment.""" + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_hf_transfer_download_failure_fallback(self): + """Test fallback to standard download when hf_transfer fails.""" + downloader = HfTransferDownloader() + + # Test that unavailable downloader raises error + if not downloader.hf_transfer_available: + try: + result = downloader.download( + "https://huggingface.co/gpt2/resolve/main/file.bin", + str(self.temp_dir / "file.bin"), + ) + assert not result.success + except RuntimeError as e: + assert "hf_transfer not available" in str(e) + + @patch("src.huggingface_accelerator.HfApi.repo_info") + def test_hf_api_failure_handling(self, mock_repo_info): + """Test handling of HuggingFace API failures.""" + # Mock API failure + mock_repo_info.side_effect = Exception("API error") + + accelerator = HuggingFaceAccelerator(None) + files = accelerator.get_model_files("gpt2") + + # Should return empty list on failure + assert files == [] + + def test_invalid_model_acceleration(self): + """Test acceleration with invalid model specifications.""" + mock_workspace = Mock() + mock_workspace.has_runpod_volume = True + mock_workspace.hf_cache_path = str(self.temp_dir) + + accelerator = HuggingFaceAccelerator(mock_workspace) + + # Test with empty model ID - should return success but indicate no pre-caching needed + result = accelerator.accelerate_model_download("") + assert result.success is True + assert result.stdout is not None + assert "does not require acceleration" in result.stdout + + def test_non_hf_url_handling(self): + """Test handling of non-HuggingFace URLs.""" + downloader = HfTransferDownloader() + + # Test error handling for non-HF URLs when downloader is available + if downloader.hf_transfer_available: + result = downloader.download( + "http://example.com/file.bin", str(self.temp_dir / "file.bin") + ) + assert result.success is False + assert result.error_message is not None + assert "only supports HuggingFace URLs" in result.error_message + else: + # When not available, should raise RuntimeError + try: + result = downloader.download( + "http://example.com/file.bin", str(self.temp_dir / "file.bin") + ) + assert not result.success + except RuntimeError as e: + assert "hf_transfer not available" in str(e) + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/tests/integration/test_handler_integration.py b/tests/integration/test_handler_integration.py index 592bce7..f12bc4b 100644 --- a/tests/integration/test_handler_integration.py +++ b/tests/integration/test_handler_integration.py @@ -13,7 +13,7 @@ class TestHandlerIntegration: def setup_method(self): """Setup for each test method.""" - self.test_data_dir = Path(__file__).parent.parent.parent + self.test_data_dir = Path(__file__).parent.parent.parent / "src" self.test_input_file = self.test_data_dir / "test_input.json" self.test_class_input_file = self.test_data_dir / "test_class_input.json" diff --git a/tests/integration/test_hf_strategy_integration.py b/tests/integration/test_hf_strategy_integration.py new file mode 100644 index 0000000..dd07bcf --- /dev/null +++ b/tests/integration/test_hf_strategy_integration.py @@ -0,0 +1,162 @@ +""" +Integration tests for HuggingFace download strategy system. +""" + +import os +import pytest +from unittest.mock import Mock, patch + +from src.huggingface_accelerator import HuggingFaceAccelerator +from src.hf_strategy_factory import HFStrategyFactory +from hf_downloader_tetra import TetraHFDownloader +from hf_downloader_native import NativeHFDownloader + + +@pytest.fixture +def mock_workspace_manager(): + """Mock workspace manager for integration tests.""" + workspace_manager = Mock() + workspace_manager.hf_cache_path = "/tmp/test_cache" + return workspace_manager + + +class TestHuggingFaceAcceleratorIntegration: + """Integration tests for HuggingFaceAccelerator with strategy pattern.""" + + def test_accelerator_uses_configured_strategy(self, mock_workspace_manager): + """Test that accelerator uses the configured strategy.""" + # Set environment to use tetra strategy + os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "tetra" + + with patch("src.hf_downloader_tetra.DownloadAccelerator"): + accelerator = HuggingFaceAccelerator(mock_workspace_manager) + assert isinstance(accelerator.strategy, TetraHFDownloader) + + def test_accelerator_strategy_delegation(self, mock_workspace_manager): + """Test that accelerator properly delegates to strategy methods.""" + # Set to native strategy for simpler testing + os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "native" + + accelerator = HuggingFaceAccelerator(mock_workspace_manager) + + # Mock the strategy methods + accelerator.strategy.should_accelerate = Mock(return_value=True) + accelerator.strategy.download_model = Mock(return_value=Mock(success=True)) + accelerator.strategy.is_model_cached = Mock(return_value=False) + accelerator.strategy.get_cache_info = Mock(return_value={"cached": False}) + accelerator.strategy.clear_model_cache = Mock(return_value=Mock(success=True)) + + # Test delegation + assert accelerator.should_accelerate_model("gpt2") + accelerator.strategy.should_accelerate.assert_called_once_with("gpt2") + + accelerator.accelerate_model_download("gpt2", "main") + accelerator.strategy.download_model.assert_called_once_with("gpt2", "main") + + assert not accelerator.is_model_cached("gpt2", "main") + accelerator.strategy.is_model_cached.assert_called_once_with("gpt2", "main") + + cache_info = accelerator.get_cache_info("gpt2") + assert cache_info == {"cached": False} + accelerator.strategy.get_cache_info.assert_called_once_with("gpt2") + + accelerator.clear_model_cache("gpt2") + accelerator.strategy.clear_model_cache.assert_called_once_with("gpt2") + + def test_accelerator_strategy_switching(self, mock_workspace_manager): + """Test runtime strategy switching.""" + # Start with native strategy + os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "native" + + accelerator = HuggingFaceAccelerator(mock_workspace_manager) + assert isinstance(accelerator.strategy, NativeHFDownloader) + + # Switch to tetra strategy + with patch("src.hf_downloader_tetra.DownloadAccelerator"): + accelerator.set_strategy("tetra") + assert isinstance(accelerator.strategy, TetraHFDownloader) + + # Check environment was updated + assert os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] == "tetra" + + def test_accelerator_get_strategy_info(self, mock_workspace_manager): + """Test getting strategy information from accelerator.""" + os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "native" + + accelerator = HuggingFaceAccelerator(mock_workspace_manager) + info = accelerator.get_strategy_info() + + assert info["current_strategy"] == "native" + assert info["strategy_instance"] == "NativeHFDownloader" + assert info["environment_variable"] == HFStrategyFactory.STRATEGY_ENV_VAR + + +class TestStrategyEnvironmentIntegration: + """Test environment variable integration across the system.""" + + def test_strategy_persistence_across_instances(self, mock_workspace_manager): + """Test that strategy setting persists across new instances.""" + # Set strategy + HFStrategyFactory.set_strategy("tetra") + + # Create first instance + with patch("src.hf_downloader_tetra.DownloadAccelerator"): + accelerator1 = HuggingFaceAccelerator(mock_workspace_manager) + assert isinstance(accelerator1.strategy, TetraHFDownloader) + + # Create second instance - should use same strategy + with patch("src.hf_downloader_tetra.DownloadAccelerator"): + accelerator2 = HuggingFaceAccelerator(mock_workspace_manager) + assert isinstance(accelerator2.strategy, TetraHFDownloader) + + def test_invalid_strategy_fallback(self, mock_workspace_manager): + """Test fallback behavior with invalid strategy.""" + # Set invalid strategy + os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "invalid_strategy" + + with patch("src.hf_downloader_tetra.DownloadAccelerator"): + accelerator = HuggingFaceAccelerator(mock_workspace_manager) + # Should fallback to tetra (default) + assert isinstance(accelerator.strategy, TetraHFDownloader) + + def test_no_env_var_uses_default(self, mock_workspace_manager): + """Test default strategy when no environment variable is set.""" + # Clear environment variable + if HFStrategyFactory.STRATEGY_ENV_VAR in os.environ: + del os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] + + with patch("src.hf_downloader_tetra.DownloadAccelerator"): + accelerator = HuggingFaceAccelerator(mock_workspace_manager) + # Should use default (tetra) + assert isinstance(accelerator.strategy, TetraHFDownloader) + + +class TestWorkspaceManagerIntegration: + """Test integration with workspace manager.""" + + def test_strategy_uses_workspace_cache_path(self): + """Test that strategies use workspace manager's cache path.""" + import tempfile + + with tempfile.TemporaryDirectory() as temp_dir: + workspace_manager = Mock() + workspace_manager.hf_cache_path = temp_dir + + # Test tetra strategy + with patch("src.hf_downloader_tetra.DownloadAccelerator"): + tetra_strategy = TetraHFDownloader(workspace_manager) + assert str(tetra_strategy.cache_dir) == temp_dir + + # Test native strategy (doesn't use cache_dir directly but should store workspace_manager) + native_strategy = NativeHFDownloader(workspace_manager) + assert native_strategy.workspace_manager == workspace_manager + + def test_strategy_with_no_cache_path(self): + """Test strategy behavior when workspace manager has no cache path.""" + workspace_manager = Mock() + workspace_manager.hf_cache_path = None + + with patch("src.hf_downloader_tetra.DownloadAccelerator"): + tetra_strategy = TetraHFDownloader(workspace_manager) + # Should fall back to default cache location + assert "huggingface" in str(tetra_strategy.cache_dir) diff --git a/tests/integration/test_runpod_volume_integration.py b/tests/integration/test_runpod_volume_integration.py index 6a81843..64ae524 100644 --- a/tests/integration/test_runpod_volume_integration.py +++ b/tests/integration/test_runpod_volume_integration.py @@ -4,16 +4,31 @@ import base64 import cloudpickle import threading -from unittest.mock import Mock, patch +from unittest.mock import Mock, patch, MagicMock -from handler import RemoteExecutor, handler -from remote_execution import FunctionResponse -from constants import RUNPOD_VOLUME_PATH, VENV_DIR_NAME, RUNTIMES_DIR_NAME +from src.handler import RemoteExecutor, handler +from src.remote_execution import FunctionResponse +from src.constants import RUNPOD_VOLUME_PATH, VENV_DIR_NAME, RUNTIMES_DIR_NAME class TestFullWorkflowWithVolume: """Test complete request workflows with volume integration.""" + def setup_method(self): + # Patch subprocess.run globally for all tests in this class + class ContextManagerMock(MagicMock): + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + self.subprocess_run_patcher = patch("subprocess.run", new=ContextManagerMock()) + self.subprocess_run_patcher.start() + + def teardown_method(self): + self.subprocess_run_patcher.stop() + @patch("os.makedirs") @patch("workspace_manager.WorkspaceManager._validate_virtual_environment") @patch("os.path.exists") @@ -80,8 +95,15 @@ def numpy_test(): # Should have installed dependencies assert mock_popen.called - install_command = mock_popen.call_args[0][0] - assert "numpy==1.21.0" in " ".join(install_command) + # Check that a uv pip install command was made with numpy + popen_calls = [call[0][0] for call in mock_popen.call_args_list] + install_calls = [ + call + for call in popen_calls + if "uv" in call and "pip" in call and "install" in call + ] + assert len(install_calls) > 0 + assert any("numpy==1.21.0" in " ".join(call) for call in install_calls) @patch("os.makedirs") @patch("workspace_manager.WorkspaceManager._validate_virtual_environment") @@ -142,10 +164,21 @@ async def test_workflow_with_system_dependencies( b"", ) + # Mock subprocess calls in order: + # 1. which nala (system package acceleration check) + # 2. apt-get update + # 3. apt-get install + # 4. uv pip list (get installed packages) + # 5. uv pip install + nala_check_process = Mock() + nala_check_process.returncode = 1 # nala not available + nala_check_process.communicate.return_value = (b"", b"which: nala: not found") + mock_popen.side_effect = [ + nala_check_process, apt_update_process, apt_install_process, - pip_list_process, # Added missing call + pip_list_process, pip_install_process, ] @@ -161,12 +194,12 @@ async def test_workflow_with_system_dependencies( "function_code": """ def system_test(): import subprocess - result = subprocess.run(['which', 'curl'], capture_output=True, text=True) + result = subprocess.run(['which', 'wget'], capture_output=True, text=True) return result.stdout.strip() """, "args": [], "kwargs": {}, - "system_dependencies": ["curl"], + "system_dependencies": ["wget"], "dependencies": ["requests==2.25.1"], } } @@ -177,17 +210,35 @@ def system_test(): assert result["success"] is True # Should have called apt-get update and install - calls = [call[0][0] for call in mock_popen.call_args_list] - assert any("apt-get" in " ".join(call) and "update" in call for call in calls) - assert any("apt-get" in " ".join(call) and "curl" in call for call in calls) - assert any( - "uv" in call and "requests==2.25.1" in " ".join(call) for call in calls - ) + popen_calls = [call[0][0] for call in mock_popen.call_args_list] + assert any( + "apt-get" in " ".join(call) and "wget" in " ".join(call) + for call in popen_calls + ) + assert any( + "uv" in " ".join(call) and "requests==2.25.1" in " ".join(call) + for call in popen_calls + ) class TestConcurrentRequests: """Test realistic concurrent access scenarios.""" + def setup_method(self): + # Patch subprocess.run globally for all tests in this class + class ContextManagerMock(MagicMock): + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + self.subprocess_run_patcher = patch("subprocess.run", new=ContextManagerMock()) + self.subprocess_run_patcher.start() + + def teardown_method(self): + self.subprocess_run_patcher.stop() + @patch("os.makedirs") @patch("workspace_manager.WorkspaceManager._validate_virtual_environment") @patch("os.path.exists") @@ -331,6 +382,21 @@ def install_deps(executor, packages): class TestMixedExecution: """Test mixed volume and non-volume execution scenarios.""" + def setup_method(self): + # Patch subprocess.run globally for all tests in this class + class ContextManagerMock(MagicMock): + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + self.subprocess_run_patcher = patch("subprocess.run", new=ContextManagerMock()) + self.subprocess_run_patcher.start() + + def teardown_method(self): + self.subprocess_run_patcher.stop() + @patch("os.makedirs") @patch("workspace_manager.WorkspaceManager._validate_virtual_environment") @patch("os.path.exists") @@ -395,11 +461,10 @@ async def test_fallback_on_volume_initialization_failure( ) # Volume exists but venv doesn't exist # Mock file operations - mock_file = Mock() + mock_file = MagicMock() mock_file.fileno.return_value = 3 mock_open.return_value.__enter__.return_value = mock_file - # Mock failed virtual environment creation mock_process = Mock() mock_process.returncode = 1 mock_process.communicate.return_value = (b"", b"Failed to create venv") @@ -426,6 +491,21 @@ async def test_fallback_on_volume_initialization_failure( class TestErrorHandlingIntegration: """Test error handling in integrated volume scenarios.""" + def setup_method(self): + # Patch subprocess.run globally for all tests in this class + class ContextManagerMock(MagicMock): + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + self.subprocess_run_patcher = patch("subprocess.run", new=ContextManagerMock()) + self.subprocess_run_patcher.start() + + def teardown_method(self): + self.subprocess_run_patcher.stop() + @patch("os.makedirs") @patch("workspace_manager.WorkspaceManager._validate_virtual_environment") @patch("os.path.exists") diff --git a/tests/unit/test_dependency_installer.py b/tests/unit/test_dependency_installer.py index d3760c2..6911f64 100644 --- a/tests/unit/test_dependency_installer.py +++ b/tests/unit/test_dependency_installer.py @@ -30,7 +30,9 @@ def test_install_system_dependencies_success(self, mock_popen): mock_popen.side_effect = [update_process, install_process] - result = self.installer.install_system_dependencies(["curl", "wget"]) + result = self.installer.install_system_dependencies( + ["curl", "wget"], accelerate_downloads=False + ) assert result.success is True assert "Installed packages" in result.stdout @@ -45,7 +47,9 @@ def test_install_system_dependencies_update_failure(self, mock_popen): mock_popen.return_value = update_process - result = self.installer.install_system_dependencies(["curl"]) + result = self.installer.install_system_dependencies( + ["curl"], accelerate_downloads=False + ) assert result.success is False assert "Error updating package list" in result.error @@ -103,6 +107,75 @@ def test_install_dependencies_empty_list(self): assert result.success is True assert "No packages to install" in result.stdout + @patch("subprocess.Popen") + @patch("importlib.invalidate_caches") + def test_install_dependencies_with_acceleration_enabled( + self, mock_invalidate, mock_popen + ): + """Test Python dependency installation with acceleration enabled (uses UV).""" + process = Mock() + process.returncode = 0 + process.communicate.return_value = (b"Successfully installed with UV", b"") + mock_popen.return_value = process + + result = self.installer.install_dependencies( + ["requests", "numpy"], accelerate_downloads=True + ) + + assert result.success is True + assert "Successfully installed with UV" in result.stdout + # Verify UV was used + mock_popen.assert_called_once() + args = mock_popen.call_args[0][0] + assert args[0] == "uv" + assert args[1] == "pip" + assert args[2] == "install" + mock_invalidate.assert_called_once() + + @patch("subprocess.Popen") + @patch("importlib.invalidate_caches") + def test_install_dependencies_with_acceleration_disabled( + self, mock_invalidate, mock_popen + ): + """Test Python dependency installation with acceleration disabled (uses UV).""" + process = Mock() + process.returncode = 0 + process.communicate.return_value = (b"Successfully installed with UV", b"") + mock_popen.return_value = process + + result = self.installer.install_dependencies( + ["requests", "numpy"], accelerate_downloads=False + ) + + assert result.success is True + assert "Successfully installed with UV" in result.stdout + # Verify UV was used + mock_popen.assert_called_once() + args = mock_popen.call_args[0][0] + assert args[0] == "uv" + assert args[1] == "pip" + assert args[2] == "install" + mock_invalidate.assert_called_once() + + @patch("subprocess.Popen") + def test_install_dependencies_uv_failure(self, mock_popen): + """Test Python dependency installation failure using UV.""" + process = Mock() + process.returncode = 1 + process.communicate.return_value = (b"", b"Package not found") + mock_popen.return_value = process + + result = self.installer.install_dependencies( + ["nonexistent-package"], accelerate_downloads=False + ) + + assert result.success is False + assert "Error installing packages" in result.error + # Verify UV was used + args = mock_popen.call_args[0][0] + assert args[0] == "uv" + assert args[1] == "pip" + class TestDifferentialInstallation: """Test differential package installation with volume.""" @@ -171,3 +244,212 @@ def test_skip_already_installed_packages(self, mock_popen, mock_exists): assert result.success is True assert "All packages already installed" in result.stdout + + +class TestSystemPackageAcceleration: + """Test system package acceleration with nala.""" + + def setup_method(self): + """Setup for each test method.""" + self.workspace_manager = Mock(spec=WorkspaceManager) + self.installer = DependencyInstaller(self.workspace_manager) + + @patch("subprocess.Popen") + def test_nala_availability_check_available(self, mock_popen): + """Test nala availability detection when nala is available.""" + process = Mock() + process.returncode = 0 + process.communicate.return_value = (b"/usr/bin/nala", b"") + mock_popen.return_value = process + + # First call should check availability + assert self.installer._check_nala_available() is True + + # Second call should use cached result + assert self.installer._check_nala_available() is True + + # Should only call subprocess once due to caching + assert mock_popen.call_count == 1 + + @patch("subprocess.Popen") + def test_nala_availability_check_unavailable(self, mock_popen): + """Test nala availability detection when nala is not available.""" + process = Mock() + process.returncode = 1 + process.communicate.return_value = (b"", b"which: nala: not found") + mock_popen.return_value = process + + assert self.installer._check_nala_available() is False + + @patch("subprocess.Popen") + def test_nala_availability_check_exception(self, mock_popen): + """Test nala availability detection when subprocess raises exception.""" + mock_popen.side_effect = Exception("Command failed") + + assert self.installer._check_nala_available() is False + + def test_identify_large_system_packages(self): + """Test identification of large system packages.""" + packages = ["build-essential", "curl", "python3-dev", "nano", "gcc"] + large_packages = self.installer._identify_large_system_packages(packages) + + expected = ["build-essential", "curl", "python3-dev", "gcc"] + assert set(large_packages) == set(expected) + + def test_identify_large_system_packages_empty(self): + """Test identification when no large packages are present.""" + packages = ["nano", "vim", "htop"] + large_packages = self.installer._identify_large_system_packages(packages) + + assert large_packages == [] + + @patch("subprocess.Popen") + def test_install_system_with_nala_success(self, mock_popen): + """Test successful system package installation with nala.""" + # Mock nala update + update_process = Mock() + update_process.returncode = 0 + update_process.communicate.return_value = (b"Updated with nala", b"") + + # Mock nala install + install_process = Mock() + install_process.returncode = 0 + install_process.communicate.return_value = (b"Installed with nala", b"") + + mock_popen.side_effect = [update_process, install_process] + + result = self.installer._install_system_with_nala(["build-essential"]) + + assert result.success is True + assert "Installed with nala acceleration" in result.stdout + assert mock_popen.call_count == 2 + + @patch("subprocess.Popen") + def test_install_system_with_nala_update_failure_fallback(self, mock_popen): + """Test nala installation fallback when update fails.""" + # Mock failed nala update + update_process = Mock() + update_process.returncode = 1 + update_process.communicate.return_value = (b"", b"Update failed") + + # Mock successful apt-get operations for fallback + apt_update_process = Mock() + apt_update_process.returncode = 0 + apt_update_process.communicate.return_value = (b"Updated", b"") + + apt_install_process = Mock() + apt_install_process.returncode = 0 + apt_install_process.communicate.return_value = (b"Installed", b"") + + mock_popen.side_effect = [ + update_process, + apt_update_process, + apt_install_process, + ] + + result = self.installer._install_system_with_nala(["build-essential"]) + + assert result.success is True + assert "Installed with nala acceleration" not in result.stdout + + @patch("subprocess.Popen") + def test_install_system_with_nala_install_failure_fallback(self, mock_popen): + """Test nala installation fallback when install fails.""" + # Mock successful nala update + update_process = Mock() + update_process.returncode = 0 + update_process.communicate.return_value = (b"Updated", b"") + + # Mock failed nala install + install_process = Mock() + install_process.returncode = 1 + install_process.communicate.return_value = (b"", b"Install failed") + + # Mock successful apt-get operations for fallback + apt_update_process = Mock() + apt_update_process.returncode = 0 + apt_update_process.communicate.return_value = (b"Updated", b"") + + apt_install_process = Mock() + apt_install_process.returncode = 0 + apt_install_process.communicate.return_value = (b"Installed", b"") + + mock_popen.side_effect = [ + update_process, + install_process, + apt_update_process, + apt_install_process, + ] + + result = self.installer._install_system_with_nala(["build-essential"]) + + assert result.success is True + assert "Installed with nala acceleration" not in result.stdout + + @patch("subprocess.Popen") + def test_install_system_dependencies_with_acceleration(self, mock_popen): + """Test system dependency installation with acceleration enabled.""" + # Mock nala availability check + nala_check = Mock() + nala_check.returncode = 0 + nala_check.communicate.return_value = (b"/usr/bin/nala", b"") + + # Mock nala operations + nala_update = Mock() + nala_update.returncode = 0 + nala_update.communicate.return_value = (b"Updated", b"") + + nala_install = Mock() + nala_install.returncode = 0 + nala_install.communicate.return_value = (b"Installed with nala", b"") + + mock_popen.side_effect = [nala_check, nala_update, nala_install] + + result = self.installer.install_system_dependencies( + ["build-essential", "python3-dev"], accelerate_downloads=True + ) + + assert result.success is True + assert "Installed with nala acceleration" in result.stdout + + @patch("subprocess.Popen") + def test_install_system_dependencies_without_acceleration(self, mock_popen): + """Test system dependency installation with acceleration disabled.""" + # Mock apt-get operations + apt_update = Mock() + apt_update.returncode = 0 + apt_update.communicate.return_value = (b"Updated", b"") + + apt_install = Mock() + apt_install.returncode = 0 + apt_install.communicate.return_value = (b"Installed", b"") + + mock_popen.side_effect = [apt_update, apt_install] + + result = self.installer.install_system_dependencies( + ["build-essential"], accelerate_downloads=False + ) + + assert result.success is True + assert "Installed with nala acceleration" not in result.stdout + + @patch("subprocess.Popen") + def test_install_system_dependencies_no_large_packages(self, mock_popen): + """Test system dependency installation when no large packages are present.""" + # Mock apt-get operations (should fallback to standard) + apt_update = Mock() + apt_update.returncode = 0 + apt_update.communicate.return_value = (b"Updated", b"") + + apt_install = Mock() + apt_install.returncode = 0 + apt_install.communicate.return_value = (b"Installed", b"") + + mock_popen.side_effect = [apt_update, apt_install] + + result = self.installer.install_system_dependencies( + ["nano", "vim"], accelerate_downloads=True + ) + + assert result.success is True + assert "Installed with nala acceleration" not in result.stdout diff --git a/tests/unit/test_hf_download_strategies.py b/tests/unit/test_hf_download_strategies.py new file mode 100644 index 0000000..898ab17 --- /dev/null +++ b/tests/unit/test_hf_download_strategies.py @@ -0,0 +1,260 @@ +""" +Unit tests for HuggingFace download strategies. +""" + +import os +import pytest +from unittest.mock import Mock, patch + +from src.hf_downloader_tetra import TetraHFDownloader +from src.hf_downloader_native import NativeHFDownloader +from src.hf_strategy_factory import HFStrategyFactory +from src.remote_execution import FunctionResponse + + +@pytest.fixture +def mock_workspace_manager(): + """Mock workspace manager.""" + workspace_manager = Mock() + workspace_manager.hf_cache_path = "/tmp/test_cache" + return workspace_manager + + +@pytest.fixture +def mock_download_accelerator(): + """Mock download accelerator.""" + accelerator = Mock() + accelerator.hf_transfer_downloader = Mock() + accelerator.hf_transfer_downloader.hf_transfer_available = True + return accelerator + + +class TestHFStrategyFactory: + """Tests for HF strategy factory.""" + + def test_get_available_strategies(self): + """Test getting available strategies.""" + strategies = HFStrategyFactory.get_available_strategies() + assert HFStrategyFactory.TETRA_STRATEGY in strategies + assert HFStrategyFactory.NATIVE_STRATEGY in strategies + + def test_get_configured_strategy_default(self): + """Test default strategy when no env var set.""" + # Clear environment variable + if HFStrategyFactory.STRATEGY_ENV_VAR in os.environ: + del os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] + + strategy = HFStrategyFactory.get_configured_strategy() + assert strategy == HFStrategyFactory.DEFAULT_STRATEGY + + def test_get_configured_strategy_from_env(self): + """Test getting strategy from environment variable.""" + os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "tetra" + strategy = HFStrategyFactory.get_configured_strategy() + assert strategy == "tetra" + + def test_get_configured_strategy_invalid_fallback(self): + """Test fallback to default for invalid strategy.""" + os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "invalid_strategy" + strategy = HFStrategyFactory.get_configured_strategy() + assert strategy == HFStrategyFactory.DEFAULT_STRATEGY + + def test_create_tetra_strategy(self, mock_workspace_manager): + """Test creating tetra strategy.""" + with patch("src.hf_strategy_factory.TetraHFDownloader") as mock_tetra: + mock_instance = Mock() + mock_tetra.return_value = mock_instance + + strategy = HFStrategyFactory.create_strategy( + mock_workspace_manager, HFStrategyFactory.TETRA_STRATEGY + ) + + mock_tetra.assert_called_once_with(mock_workspace_manager) + assert strategy == mock_instance + + def test_create_native_strategy(self, mock_workspace_manager): + """Test creating native strategy.""" + with patch("src.hf_strategy_factory.NativeHFDownloader") as mock_native: + mock_instance = Mock() + mock_native.return_value = mock_instance + + strategy = HFStrategyFactory.create_strategy( + mock_workspace_manager, HFStrategyFactory.NATIVE_STRATEGY + ) + + mock_native.assert_called_once_with(mock_workspace_manager) + assert strategy == mock_instance + + def test_set_strategy(self): + """Test setting strategy environment variable.""" + HFStrategyFactory.set_strategy("tetra") + assert os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] == "tetra" + + def test_set_strategy_invalid(self): + """Test setting invalid strategy raises error.""" + with pytest.raises(ValueError): + HFStrategyFactory.set_strategy("invalid_strategy") + + def test_get_strategy_info(self): + """Test getting strategy information.""" + os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "tetra" + + info = HFStrategyFactory.get_strategy_info() + + assert info["current_strategy"] == "tetra" + assert info["environment_variable"] == HFStrategyFactory.STRATEGY_ENV_VAR + assert info["environment_value"] == "tetra" + assert info["default_strategy"] == HFStrategyFactory.DEFAULT_STRATEGY + assert "tetra" in info["available_strategies"] + assert "native" in info["available_strategies"] + + +class TestTetraHFDownloader: + """Tests for Tetra HF downloader strategy.""" + + def test_init(self, mock_workspace_manager): + """Test TetraHFDownloader initialization.""" + with patch( + "src.hf_downloader_tetra.DownloadAccelerator" + ) as mock_accelerator_class: + downloader = TetraHFDownloader(mock_workspace_manager) + + assert downloader.workspace_manager == mock_workspace_manager + mock_accelerator_class.assert_called_once_with(mock_workspace_manager) + + def test_should_accelerate_with_hf_transfer(self, mock_workspace_manager): + """Test should_accelerate when hf_transfer is available.""" + with patch( + "src.hf_downloader_tetra.DownloadAccelerator" + ) as mock_accelerator_class: + mock_accelerator = Mock() + mock_accelerator.hf_transfer_downloader.hf_transfer_available = True + mock_accelerator_class.return_value = mock_accelerator + + downloader = TetraHFDownloader(mock_workspace_manager) + + # Should accelerate large models + assert downloader.should_accelerate("gpt-3.5-turbo") + assert downloader.should_accelerate("llama") + + # Should not accelerate small models + assert not downloader.should_accelerate("prajjwal1/bert-tiny") + + def test_should_accelerate_without_hf_transfer(self, mock_workspace_manager): + """Test should_accelerate when hf_transfer is not available.""" + with patch( + "src.hf_downloader_tetra.DownloadAccelerator" + ) as mock_accelerator_class: + mock_accelerator = Mock() + mock_accelerator.hf_transfer_downloader.hf_transfer_available = False + mock_accelerator_class.return_value = mock_accelerator + + downloader = TetraHFDownloader(mock_workspace_manager) + + # Should not accelerate any models without hf_transfer + assert not downloader.should_accelerate("gpt-3.5-turbo") + assert not downloader.should_accelerate("llama") + + @patch("src.hf_downloader_tetra.Path.mkdir") + def test_download_model_success(self, mock_mkdir, mock_workspace_manager): + """Test successful model download.""" + with patch( + "src.hf_downloader_tetra.DownloadAccelerator" + ) as mock_accelerator_class: + mock_accelerator = Mock() + mock_accelerator.hf_transfer_downloader.hf_transfer_available = True + mock_accelerator_class.return_value = mock_accelerator + + downloader = TetraHFDownloader(mock_workspace_manager) + + # Mock get_model_files to return test files + downloader.get_model_files = Mock( + return_value=[ + { + "path": "pytorch_model.bin", + "size": 100 * 1024 * 1024, + "url": "https://test.com/file", + } + ] + ) + + # Mock download_with_fallback to succeed + mock_accelerator.download_with_fallback.return_value = FunctionResponse( + success=True + ) + + result = downloader.download_model("gpt2") + + assert result.success + assert "Successfully pre-downloaded" in result.stdout + + def test_download_model_no_acceleration_needed(self, mock_workspace_manager): + """Test download when no acceleration is needed.""" + with patch( + "src.hf_downloader_tetra.DownloadAccelerator" + ) as mock_accelerator_class: + mock_accelerator = Mock() + mock_accelerator.hf_transfer_downloader.hf_transfer_available = False + mock_accelerator_class.return_value = mock_accelerator + + downloader = TetraHFDownloader(mock_workspace_manager) + + result = downloader.download_model("prajjwal1/bert-tiny") + + assert result.success + assert "does not require acceleration" in result.stdout + + +class TestNativeHFDownloader: + """Tests for Native HF downloader strategy.""" + + def test_init(self, mock_workspace_manager): + """Test NativeHFDownloader initialization.""" + downloader = NativeHFDownloader(mock_workspace_manager) + assert downloader.workspace_manager == mock_workspace_manager + + def test_should_accelerate(self, mock_workspace_manager): + """Test should_accelerate logic.""" + downloader = NativeHFDownloader(mock_workspace_manager) + + # Should accelerate large models + assert downloader.should_accelerate("gpt-3.5-turbo") + assert downloader.should_accelerate("llama") + + # Should not accelerate small models + assert not downloader.should_accelerate("prajjwal1/bert-tiny") + + @patch("src.hf_downloader_native.snapshot_download") + def test_download_model_success( + self, mock_snapshot_download, mock_workspace_manager + ): + """Test successful model download.""" + mock_snapshot_download.return_value = "/cache/models/gpt2" + + downloader = NativeHFDownloader(mock_workspace_manager) + result = downloader.download_model("gpt2") + + assert result.success + assert "Successfully pre-cached model gpt2" in result.stdout + mock_snapshot_download.assert_called_once_with(repo_id="gpt2", revision="main") + + @patch("src.hf_downloader_native.snapshot_download") + def test_download_model_failure( + self, mock_snapshot_download, mock_workspace_manager + ): + """Test failed model download.""" + mock_snapshot_download.side_effect = Exception("Download failed") + + downloader = NativeHFDownloader(mock_workspace_manager) + result = downloader.download_model("gpt2") + + assert not result.success + assert "Failed to pre-cache model gpt2" in result.error + + def test_download_model_no_acceleration_needed(self, mock_workspace_manager): + """Test download when no acceleration is needed.""" + downloader = NativeHFDownloader(mock_workspace_manager) + result = downloader.download_model("prajjwal1/bert-tiny") + + assert result.success + assert "does not require pre-caching" in result.stdout diff --git a/tests/unit/test_remote_executor.py b/tests/unit/test_remote_executor.py index 98e4fcd..928adcb 100644 --- a/tests/unit/test_remote_executor.py +++ b/tests/unit/test_remote_executor.py @@ -1,7 +1,7 @@ import pytest import base64 import cloudpickle -from unittest.mock import Mock, patch +from unittest.mock import Mock, patch, AsyncMock from remote_executor import RemoteExecutor from remote_execution import FunctionRequest @@ -109,11 +109,15 @@ async def test_execute_function_with_dependencies_orchestration(self): self.executor.workspace_manager, "initialize_workspace" ) as mock_init: with patch.object( - self.executor.dependency_installer, "install_system_dependencies" - ) as mock_sys_deps: + self.executor.dependency_installer, + "install_system_dependencies_async", + new_callable=AsyncMock, + ) as mock_sys_deps_async: with patch.object( - self.executor.dependency_installer, "install_dependencies" - ) as mock_py_deps: + self.executor.dependency_installer, + "install_dependencies_async", + new_callable=AsyncMock, + ) as mock_py_deps_async: with patch.object( self.executor.function_executor, "execute" ) as mock_execute: @@ -121,10 +125,14 @@ async def test_execute_function_with_dependencies_orchestration(self): mock_init.return_value = Mock( success=True, stdout="Workspace ready" ) - mock_sys_deps.return_value = Mock( + + # Mock async methods with proper FunctionResponse returns + from remote_execution import FunctionResponse + + mock_sys_deps_async.return_value = FunctionResponse( success=True, stdout="System deps installed" ) - mock_py_deps.return_value = Mock( + mock_py_deps_async.return_value = FunctionResponse( success=True, stdout="Python deps installed" ) mock_execute.return_value = Mock( @@ -134,8 +142,8 @@ async def test_execute_function_with_dependencies_orchestration(self): await self.executor.ExecuteFunction(request) # Verify all components were called in correct order - mock_sys_deps.assert_called_once_with(["curl"]) - mock_py_deps.assert_called_once_with(["requests"]) + mock_sys_deps_async.assert_called_once_with(["curl"], True) + mock_py_deps_async.assert_called_once_with(["requests"], True) mock_execute.assert_called_once_with(request) @pytest.mark.asyncio @@ -184,8 +192,10 @@ async def test_execute_function_dependency_failure_stops_execution(self): self.executor.workspace_manager, "initialize_workspace" ) as mock_init: with patch.object( - self.executor.dependency_installer, "install_dependencies" - ) as mock_py_deps: + self.executor.dependency_installer, + "install_dependencies_async", + new_callable=AsyncMock, + ) as mock_py_deps_async: with patch.object( self.executor.function_executor, "execute" ) as mock_execute: @@ -193,7 +203,11 @@ async def test_execute_function_dependency_failure_stops_execution(self): mock_init.return_value = Mock( success=True, stdout="Workspace ready" ) - mock_py_deps.return_value = Mock( + + # Mock async method with FunctionResponse + from remote_execution import FunctionResponse + + mock_py_deps_async.return_value = FunctionResponse( success=False, error="Package not found" ) @@ -211,8 +225,8 @@ def test_component_access_methods(self): self.executor.dependency_installer, "install_dependencies" ) as mock_install: mock_install.return_value = Mock(success=True) - self.executor.dependency_installer.install_dependencies(["test"]) - mock_install.assert_called_once_with(["test"]) + self.executor.dependency_installer.install_dependencies(["test"], True) + mock_install.assert_called_once_with(["test"], True) # Test workspace manager methods with patch.object( diff --git a/tests/unit/test_workspace_manager.py b/tests/unit/test_workspace_manager.py index 69dd8bb..701ba70 100644 --- a/tests/unit/test_workspace_manager.py +++ b/tests/unit/test_workspace_manager.py @@ -218,22 +218,14 @@ def test_configure_volume_environment(self, mock_exists, mock_makedirs): os.environ.get("UV_CACHE_DIR") == f"{RUNPOD_VOLUME_PATH}/{UV_CACHE_DIR_NAME}" ) - # HF cache is shared at volume root + # HF cache is shared at volume root - HF manages subdirectories automatically assert ( os.environ.get("HF_HOME") == f"{RUNPOD_VOLUME_PATH}/{HF_CACHE_DIR_NAME}" ) - assert ( - os.environ.get("TRANSFORMERS_CACHE") - == f"{RUNPOD_VOLUME_PATH}/{HF_CACHE_DIR_NAME}/transformers" - ) - assert ( - os.environ.get("HF_DATASETS_CACHE") - == f"{RUNPOD_VOLUME_PATH}/{HF_CACHE_DIR_NAME}/datasets" - ) - assert ( - os.environ.get("HUGGINGFACE_HUB_CACHE") - == f"{RUNPOD_VOLUME_PATH}/{HF_CACHE_DIR_NAME}/hub" - ) + # HF automatically creates and manages subdirectories, no need to set specific paths + assert "TRANSFORMERS_CACHE" not in os.environ + assert "HF_DATASETS_CACHE" not in os.environ + assert "HUGGINGFACE_HUB_CACHE" not in os.environ # Virtual environment is endpoint-specific expected_venv = ( f"{RUNPOD_VOLUME_PATH}/{RUNTIMES_DIR_NAME}/default/{VENV_DIR_NAME}" diff --git a/tetra-rp b/tetra-rp index 4bc6a8c..5322042 160000 --- a/tetra-rp +++ b/tetra-rp @@ -1 +1 @@ -Subproject commit 4bc6a8cfdd141b3ae00521f326d917098b9c2c3b +Subproject commit 5322042111dab88eb093c27d6a9e894e7b0f605b diff --git a/uv.lock b/uv.lock index 19edc18..c46d141 100644 --- a/uv.lock +++ b/uv.lock @@ -846,6 +846,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ee/45/b82e3c16be2182bff01179db177fe144d58b5dc787a7d4492c6ed8b9317f/frozenlist-1.7.0-py3-none-any.whl", hash = "sha256:9a5af342e34f7e97caf8c995864c7a396418ae2859cc6fdf1b1073020d516a7e", size = 13106 }, ] +[[package]] +name = "fsspec" +version = "2025.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/02/0835e6ab9cfc03916fe3f78c0956cfcdb6ff2669ffa6651065d5ebf7fc98/fsspec-2025.7.0.tar.gz", hash = "sha256:786120687ffa54b8283d942929540d8bc5ccfa820deb555a2b5d0ed2b737bf58", size = 304432 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2f/e0/014d5d9d7a4564cf1c40b5039bc882db69fd881111e03ab3657ac0b218e2/fsspec-2025.7.0-py3-none-any.whl", hash = "sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21", size = 199597 }, +] + [[package]] name = "h11" version = "0.16.0" @@ -855,6 +864,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, ] +[[package]] +name = "hf-transfer" +version = "0.1.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1a/eb/8fc64f40388c29ce8ce3b2b180a089d4d6b25b1d0d232d016704cb852104/hf_transfer-0.1.9.tar.gz", hash = "sha256:035572865dab29d17e783fbf1e84cf1cb24f3fcf8f1b17db1cfc7fdf139f02bf", size = 25201 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/f5/461d2e5f307e5048289b1168d5c642ae3bb2504e88dff1a38b92ed990a21/hf_transfer-0.1.9-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e66acf91df4a8b72f60223059df3003062a5ae111757187ed1a06750a30e911b", size = 1393046 }, + { url = "https://files.pythonhosted.org/packages/41/ba/8d9fd9f1083525edfcb389c93738c802f3559cb749324090d7109c8bf4c2/hf_transfer-0.1.9-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:8669dbcc7a3e2e8d61d42cd24da9c50d57770bd74b445c65123291ca842a7e7a", size = 1348126 }, + { url = "https://files.pythonhosted.org/packages/8e/a2/cd7885bc9959421065a6fae0fe67b6c55becdeda4e69b873e52976f9a9f0/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8fd0167c4407a3bc4cdd0307e65ada2294ec04f1813d8a69a5243e379b22e9d8", size = 3728604 }, + { url = "https://files.pythonhosted.org/packages/f6/2e/a072cf196edfeda3310c9a5ade0a0fdd785e6154b3ce24fc738c818da2a7/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ee8b10afedcb75f71091bcc197c526a6ebf5c58bbbadb34fdeee6160f55f619f", size = 3064995 }, + { url = "https://files.pythonhosted.org/packages/c2/84/aec9ef4c0fab93c1ea2b1badff38c78b4b2f86f0555b26d2051dbc920cde/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5828057e313de59300dd1abb489444bc452efe3f479d3c55b31a8f680936ba42", size = 3580908 }, + { url = "https://files.pythonhosted.org/packages/29/63/b560d39651a56603d64f1a0212d0472a44cbd965db2fa62b99d99cb981bf/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc6bd19e1cc177c66bdef15ef8636ad3bde79d5a4f608c158021153b4573509d", size = 3400839 }, + { url = "https://files.pythonhosted.org/packages/d6/d8/f87ea6f42456254b48915970ed98e993110521e9263472840174d32c880d/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdca9bfb89e6f8f281890cc61a8aff2d3cecaff7e1a4d275574d96ca70098557", size = 3552664 }, + { url = "https://files.pythonhosted.org/packages/d6/56/1267c39b65fc8f4e2113b36297320f102718bf5799b544a6cbe22013aa1d/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:89a23f58b7b7effbc047b8ca286f131b17728c99a9f972723323003ffd1bb916", size = 4073732 }, + { url = "https://files.pythonhosted.org/packages/82/1a/9c748befbe3decf7cb415e34f8a0c3789a0a9c55910dea73d581e48c0ce5/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:dc7fff1345980d6c0ebb92c811d24afa4b98b3e07ed070c8e38cc91fd80478c5", size = 3390096 }, + { url = "https://files.pythonhosted.org/packages/72/85/4c03da147b6b4b7cb12e074d3d44eee28604a387ed0eaf7eaaead5069c57/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:1a6bd16c667ebe89a069ca163060127a794fa3a3525292c900b8c8cc47985b0d", size = 3664743 }, + { url = "https://files.pythonhosted.org/packages/e7/6e/e597b04f753f1b09e6893075d53a82a30c13855cbaa791402695b01e369f/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d2fde99d502093ade3ab1b53f80da18480e9902aa960dab7f74fb1b9e5bc5746", size = 3695243 }, + { url = "https://files.pythonhosted.org/packages/09/89/d4e234727a26b2546c8fb70a276cd924260d60135f2165bf8b9ed67bb9a4/hf_transfer-0.1.9-cp38-abi3-win32.whl", hash = "sha256:435cc3cdc8524ce57b074032b8fd76eed70a4224d2091232fa6a8cef8fd6803e", size = 1086605 }, + { url = "https://files.pythonhosted.org/packages/a1/14/f1e15b851d1c2af5b0b1a82bf8eb10bda2da62d98180220ba6fd8879bb5b/hf_transfer-0.1.9-cp38-abi3-win_amd64.whl", hash = "sha256:16f208fc678911c37e11aa7b586bc66a37d02e636208f18b6bc53d29b5df40ad", size = 1160240 }, +] + +[[package]] +name = "hf-xet" +version = "1.1.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7a/49/91010b59debc7c862a5fd426d343134dd9a68778dbe570234b6495a4e204/hf_xet-1.1.8.tar.gz", hash = "sha256:62a0043e441753bbc446dcb5a3fe40a4d03f5fb9f13589ef1df9ab19252beb53", size = 484065 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9c/91/5814db3a0d4a65fb6a87f0931ae28073b87f06307701fe66e7c41513bfb4/hf_xet-1.1.8-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3d5f82e533fc51c7daad0f9b655d9c7811b5308e5890236828bd1dd3ed8fea74", size = 2752357 }, + { url = "https://files.pythonhosted.org/packages/70/72/ce898516e97341a7a9d450609e130e108643389110261eaee6deb1ba8545/hf_xet-1.1.8-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:8e2dba5896bca3ab61d0bef4f01a1647004de59640701b37e37eaa57087bbd9d", size = 2613142 }, + { url = "https://files.pythonhosted.org/packages/b7/d6/13af5f916cef795ac2b5e4cc1de31f2e0e375f4475d50799915835f301c2/hf_xet-1.1.8-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfe5700bc729be3d33d4e9a9b5cc17a951bf8c7ada7ba0c9198a6ab2053b7453", size = 3175859 }, + { url = "https://files.pythonhosted.org/packages/4c/ed/34a193c9d1d72b7c3901b3b5153b1be9b2736b832692e1c3f167af537102/hf_xet-1.1.8-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:09e86514c3c4284ed8a57d6b0f3d089f9836a0af0a1ceb3c9dd664f1f3eaefef", size = 3074178 }, + { url = "https://files.pythonhosted.org/packages/4a/1b/de6817b4bf65385280252dff5c9cceeedfbcb27ddb93923639323c1034a4/hf_xet-1.1.8-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4a9b99ab721d385b83f4fc8ee4e0366b0b59dce03b5888a86029cc0ca634efbf", size = 3238122 }, + { url = "https://files.pythonhosted.org/packages/b7/13/874c85c7ed519ec101deb654f06703d9e5e68d34416730f64c4755ada36a/hf_xet-1.1.8-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:25b9d43333bbef39aeae1616789ec329c21401a7fe30969d538791076227b591", size = 3344325 }, + { url = "https://files.pythonhosted.org/packages/9e/d3/0aaf279f4f3dea58e99401b92c31c0f752924ba0e6c7d7bb07b1dbd7f35e/hf_xet-1.1.8-cp37-abi3-win_amd64.whl", hash = "sha256:4171f31d87b13da4af1ed86c98cf763292e4720c088b4957cf9d564f92904ca9", size = 2801689 }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -919,6 +964,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, ] +[[package]] +name = "huggingface-hub" +version = "0.34.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/45/c9/bdbe19339f76d12985bc03572f330a01a93c04dffecaaea3061bdd7fb892/huggingface_hub-0.34.4.tar.gz", hash = "sha256:a4228daa6fb001be3f4f4bdaf9a0db00e1739235702848df00885c9b5742c85c", size = 459768 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/7b/bb06b061991107cd8783f300adff3e7b7f284e330fd82f507f2a1417b11d/huggingface_hub-0.34.4-py3-none-any.whl", hash = "sha256:9b365d781739c93ff90c359844221beef048403f1bc1f1c123c191257c3c890a", size = 561452 }, +] + [[package]] name = "idna" version = "3.10" @@ -2120,6 +2184,45 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/76/42/3efaf858001d2c2913de7f354563e3a3a2f0decae3efe98427125a8f441e/typer-0.16.0-py3-none-any.whl", hash = "sha256:1f79bed11d4d02d4310e3c1b7ba594183bcedb0ac73b27a9e5f28f6fb5b98855", size = 46317 }, ] +[[package]] +name = "types-requests" +version = "2.31.0.6" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.10'", +] +dependencies = [ + { name = "types-urllib3", marker = "python_full_version < '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f9/b8/c1e8d39996b4929b918aba10dba5de07a8b3f4c8487bb61bb79882544e69/types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0", size = 15535 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/a1/6f8dc74d9069e790d604ddae70cb46dcbac668f1bb08136e7b0f2f5cd3bf/types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9", size = 14516 }, +] + +[[package]] +name = "types-requests" +version = "2.32.4.20250809" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.10'", +] +dependencies = [ + { name = "urllib3", version = "2.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/b0/9355adb86ec84d057fea765e4c49cce592aaf3d5117ce5609a95a7fc3dac/types_requests-2.32.4.20250809.tar.gz", hash = "sha256:d8060de1c8ee599311f56ff58010fb4902f462a1470802cf9f6ed27bc46c4df3", size = 23027 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2b/6f/ec0012be842b1d888d46884ac5558fd62aeae1f0ec4f7a581433d890d4b5/types_requests-2.32.4.20250809-py3-none-any.whl", hash = "sha256:f73d1832fb519ece02c85b1f09d5f0dd3108938e7d47e7f94bbfa18a6782b163", size = 20644 }, +] + +[[package]] +name = "types-urllib3" +version = "1.26.25.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/73/de/b9d7a68ad39092368fb21dd6194b362b98a1daeea5dcfef5e1adb5031c7e/types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f", size = 11239 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/7b/3fc711b2efea5e85a7a0bbfe269ea944aa767bbba5ec52f9ee45d362ccf3/types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e", size = 15377 }, +] + [[package]] name = "typing-extensions" version = "4.14.1" @@ -2470,7 +2573,10 @@ version = "0.4.1" source = { virtual = "." } dependencies = [ { name = "cloudpickle" }, + { name = "hf-transfer" }, + { name = "huggingface-hub" }, { name = "pydantic" }, + { name = "requests" }, { name = "runpod" }, ] @@ -2482,12 +2588,17 @@ dev = [ { name = "pytest-cov" }, { name = "pytest-mock" }, { name = "ruff" }, + { name = "types-requests", version = "2.31.0.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "types-requests", version = "2.32.4.20250809", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, ] [package.metadata] requires-dist = [ { name = "cloudpickle", specifier = ">=3.1.1" }, + { name = "hf-transfer", specifier = ">=0.1.0" }, + { name = "huggingface-hub", specifier = ">=0.32.0" }, { name = "pydantic", specifier = ">=2.11.4" }, + { name = "requests", specifier = ">=2.25.0" }, { name = "runpod" }, ] @@ -2499,6 +2610,7 @@ dev = [ { name = "pytest-cov", specifier = ">=6.0.0" }, { name = "pytest-mock", specifier = ">=3.14.0" }, { name = "ruff", specifier = ">=0.8.0" }, + { name = "types-requests", specifier = ">=2.25.0" }, ] [[package]]