runpod-workers · deanq · Aug 27, 2025 · Aug 16, 2025 · Aug 16, 2025 · Aug 16, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -99,22 +99,7 @@ jobs:
         run: make setup
 
       - name: Test local handler execution
-        run: |
-          echo "Testing handler with all test_*.json files..."
-          passed=0
-          total=0
-          for test_file in test_*.json; do
-            total=$((total + 1))
-            echo "Testing with $test_file..."
-            if timeout 30s env PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat "$test_file")" uv run python src/handler.py >/dev/null 2>&1; then
-              echo "✓ $test_file: PASSED"
-              passed=$((passed + 1))
-            else
-              echo "✗ $test_file: FAILED"
-              exit 1
-            fi
-          done
-          echo "All $passed/$total handler tests passed!"
+        run: make test-handler
 
   release:
     runs-on: ubuntu-latest

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -68,12 +68,8 @@ make build-cpu               # Build CPU-only Docker image
 
 ### Local Testing  
 ```bash
-# Test handler locally with test_input.json
-PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_input.json)" uv run python src/handler.py
-
-# Test with other test files
-PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_class_input.json)" uv run python src/handler.py
-PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_hf_input.json)" uv run python src/handler.py
+# Test handler locally with test*.json
+make test-handler
 ```
 
 ### Submodule Management
@@ -122,6 +118,14 @@ The handler automatically detects and utilizes `/runpod-volume` for persistent w
 - **Optimized Resource Usage**: Shared caches across multiple endpoints while maintaining isolation
 - **ML Model Efficiency**: Large HF models cached on volume prevent "No space left on device" errors
 
+### HuggingFace Model Acceleration
+The system automatically leverages HuggingFace's native acceleration features:
+- **hf_transfer**: Accelerated downloads for large model files when available
+- **hf_xet**: Automatic chunk-level deduplication and incremental downloads (huggingface_hub>=0.32.0)
+- **Native Integration**: Uses HF Hub's `snapshot_download()` for optimal caching and acceleration
+- **Transparent Operation**: No code changes needed - acceleration is automatic when repositories support it
+- **Token Support**: Configured via `HF_TOKEN` environment variable for private repositories
+
 ## Configuration
 
 ### Environment Variables
@@ -160,11 +164,6 @@ make test-integration        # Run integration tests only
 make test-coverage           # Run tests with coverage report
 make test-fast               # Run tests with fail-fast mode
 make test-handler            # Test handler locally with all test_*.json files (same as CI)
-
-# Test handler locally with specific test files
-PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_input.json)" uv run python src/handler.py
-PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_class_input.json)" uv run python src/handler.py
-PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_hf_input.json)" uv run python src/handler.py
 ```
 
 ### Testing Framework
@@ -261,3 +260,8 @@ Configure these in GitHub repository settings:
 
 ### Docker Guidelines
 - Docker container should never refer to src/
+
+- Always run `make quality-check` before pronouncing you have finished your work
+- Always use `git mv` when moving existing files around
+
+- Run the command `make test-handler` to run checks on test files. Do not try to run it one by one like `Bash(env RUNPOD_TEST_INPUT="$(cat test_input.json)" PYTHONPATH=. uv run python handler.py)`
diff --git a/Dockerfile b/Dockerfile
@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  && chmod +x /usr/local/bin/uv
 
 # Copy app code and install dependencies
-COPY README.md src/* pyproject.toml uv.lock test_*.json test-handler.sh ./
+COPY README.md src/* pyproject.toml uv.lock ./
 RUN uv sync
 
 
@@ -19,11 +19,12 @@ FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime
 
 WORKDIR /app
 
+# Install nala for system package acceleration in runtime stage
+RUN apt-get update && apt-get install -y --no-install-recommends nala \
+ && rm -rf /var/lib/apt/lists/*
+
 # Copy app and uv binary from builder
 COPY --from=builder /app /app
 COPY --from=builder /usr/local/bin/uv /usr/local/bin/uv
 
-# Clean up any unnecessary system tools
-RUN rm -rf /var/lib/apt/lists/*
-
 CMD ["uv", "run", "handler.py"]
diff --git a/Dockerfile-cpu b/Dockerfile-cpu
@@ -11,7 +11,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  && chmod +x /usr/local/bin/uv
 
 # Copy app files and install deps
-COPY README.md src/* pyproject.toml uv.lock test_*.json test-handler.sh ./
+COPY README.md src/* pyproject.toml uv.lock ./
 RUN uv sync
 
 # Stage 2: Runtime stage
@@ -21,7 +21,7 @@ WORKDIR /app
 
 # Install runtime dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    curl ca-certificates \
+    curl ca-certificates nala \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/*
 

diff --git a/Makefile b/Makefile
@@ -68,7 +68,7 @@ test-fast: # Run tests with fast-fail mode
 	uv run pytest tests/ -v -x --tb=short
 
 test-handler: # Test handler locally with all test_*.json files
-	./test-handler.sh
+	cd src && ./test-handler.sh
 
 # Smoke Tests (local on Mac OS)
 
@@ -97,7 +97,7 @@ format-check: # Check code formatting
 
 # Type checking
 typecheck: # Check types with mypy
-	uv run mypy .
+	uv run mypy src/
 
 # Quality gates (used in CI)
-quality-check: format-check lint typecheck test-coverage
+quality-check: format-check lint typecheck test-coverage test-handler
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,10 @@ requires-python = ">=3.9,<3.13"
 dependencies = [
     "cloudpickle>=3.1.1",
     "pydantic>=2.11.4",
+    "requests>=2.25.0",
     "runpod",
+    "hf_transfer>=0.1.0",
+    "huggingface_hub>=0.32.0",
 ]
 
 [dependency-groups]
@@ -18,6 +21,7 @@ dev = [
     "pytest-asyncio>=0.24.0",
     "ruff>=0.8.0",
     "mypy>=1.11.0",
+    "types-requests>=2.25.0",
 ]
 
 [tool.pytest.ini_options]
@@ -48,40 +52,37 @@ filterwarnings = [
     "ignore::pytest.PytestUnknownMarkWarning"
 ]
 
-[tool.ruff]
-# Exclude tetra-rp directory since it's a separate repository
-exclude = [
-    "tetra-rp/",
-]
-
 [tool.mypy]
-# Basic configuration
 python_version = "3.9"
-warn_return_any = true
-warn_unused_configs = true
-disallow_untyped_defs = false  # Start lenient, can be stricter later
-disallow_incomplete_defs = false
-check_untyped_defs = true
-
-# Import discovery
-mypy_path = "src"
+mypy_path = ["src"]
+explicit_package_bases = true
 namespace_packages = true
-
-# Error output
+check_untyped_defs = true
+disallow_any_generics = true
+disallow_untyped_defs = false
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_return_any = true
+strict_optional = true
 show_error_codes = true
 show_column_numbers = true
 pretty = true
-
-# Exclude directories
 exclude = [
     "tetra-rp/",
-    "tests/",  # Start by excluding tests, can add later
 ]
 
-# Per-module options
 [[tool.mypy.overrides]]
 module = [
-    "runpod.*",
-    "cloudpickle.*",
+    "cloudpickle",
+    "runpod",
+    "transformers",
+    "hf_transfer",
+    "huggingface_hub",
 ]
 ignore_missing_imports = true
+
+[tool.ruff]
+# Exclude tetra-rp directory since it's a separate repository
+exclude = [
+    "tetra-rp/",
+]
diff --git a/src/__init__.py b/src/__init__.py
@@ -0,0 +1 @@
+"""Worker Tetra package."""
diff --git a/src/class_executor.py b/src/class_executor.py
@@ -18,7 +18,7 @@ def __init__(self, workspace_manager):
         super().__init__(workspace_manager)
         # Instance registry for persistent class instances
         self.class_instances: Dict[str, Any] = {}
-        self.instance_metadata: Dict[str, Dict] = {}
+        self.instance_metadata: Dict[str, Dict[str, Any]] = {}
 
     def execute(self, request: FunctionRequest) -> FunctionResponse:
         """Execute class method - required by BaseExecutor interface."""

diff --git a/src/constants.py b/src/constants.py
@@ -20,3 +20,75 @@
 
 RUNTIMES_DIR_NAME = "runtimes"
 """Name of the runtimes directory containing per-endpoint workspaces."""
+
+# Download Acceleration Settings
+MIN_SIZE_FOR_ACCELERATION_MB = 10
+"""Minimum file size in MB to trigger download acceleration."""
+
+DOWNLOAD_TIMEOUT_SECONDS = 600
+"""Default timeout for download operations in seconds."""
+
+# New download accelerator settings
+HF_TRANSFER_ENABLED = True
+"""Enable hf_transfer for fresh HuggingFace downloads."""
+
+
+# Size Conversion Constants
+BYTES_PER_MB = 1024 * 1024
+"""Number of bytes in a megabyte."""
+
+MB_SIZE_THRESHOLD = 1 * BYTES_PER_MB
+"""Minimum file size threshold for considering acceleration (1MB)."""
+
+# HuggingFace Model Patterns
+LARGE_HF_MODEL_PATTERNS = [
+    "albert-large",
+    "albert-xlarge",
+    "bart-large",
+    "bert-large",
+    "bert-base",
+    "codegen",
+    "diffusion",
+    "distilbert-base",
+    "falcon",
+    "gpt",
+    "hubert",
+    "llama",
+    "mistral",
+    "mpt",
+    "pegasus",
+    "roberta-large",
+    "roberta-base",
+    "santacoder",
+    "stable-diffusion",
+    "t5",
+    "vae",
+    "wav2vec2",
+    "whisper",
+    "xlm-roberta",
+    "xlnet",
+]
+"""List of HuggingFace model patterns that benefit from download acceleration."""
+
+# System Package Acceleration with Nala
+LARGE_SYSTEM_PACKAGES = [
+    "build-essential",
+    "cmake",
+    "cuda-toolkit",
+    "curl",
+    "g++",
+    "gcc",
+    "git",
+    "libssl-dev",
+    "nvidia-cuda-dev",
+    "python3-dev",
+    "wget",
+]
+"""List of system packages that benefit from nala's accelerated installation."""
+
+NALA_CHECK_CMD = ["which", "nala"]
+"""Command to check if nala is available."""
+
+# Logging Configuration
+LOG_FORMAT = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
+"""Standard log format string used across the application."""