diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2c862e8..afff26a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -99,22 +99,7 @@ jobs:
         run: make setup
 
       - name: Test local handler execution
-        run: |
-          echo "Testing handler with all test_*.json files..."
-          passed=0
-          total=0
-          for test_file in test_*.json; do
-            total=$((total + 1))
-            echo "Testing with $test_file..."
-            if timeout 30s env PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat "$test_file")" uv run python src/handler.py >/dev/null 2>&1; then
-              echo "✓ $test_file: PASSED"
-              passed=$((passed + 1))
-            else
-              echo "✗ $test_file: FAILED"
-              exit 1
-            fi
-          done
-          echo "All $passed/$total handler tests passed!"
+        run: make test-handler
 
   release:
     runs-on: ubuntu-latest
diff --git a/CLAUDE.md b/CLAUDE.md
index c4be927..1de083f 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -68,12 +68,8 @@ make build-cpu               # Build CPU-only Docker image
 
 ### Local Testing  
 ```bash
-# Test handler locally with test_input.json
-PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_input.json)" uv run python src/handler.py
-
-# Test with other test files
-PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_class_input.json)" uv run python src/handler.py
-PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_hf_input.json)" uv run python src/handler.py
+# Test handler locally with test*.json
+make test-handler
 ```
 
 ### Submodule Management
@@ -122,6 +118,14 @@ The handler automatically detects and utilizes `/runpod-volume` for persistent w
 - **Optimized Resource Usage**: Shared caches across multiple endpoints while maintaining isolation
 - **ML Model Efficiency**: Large HF models cached on volume prevent "No space left on device" errors
 
+### HuggingFace Model Acceleration
+The system automatically leverages HuggingFace's native acceleration features:
+- **hf_transfer**: Accelerated downloads for large model files when available
+- **hf_xet**: Automatic chunk-level deduplication and incremental downloads (huggingface_hub>=0.32.0)
+- **Native Integration**: Uses HF Hub's `snapshot_download()` for optimal caching and acceleration
+- **Transparent Operation**: No code changes needed - acceleration is automatic when repositories support it
+- **Token Support**: Configured via `HF_TOKEN` environment variable for private repositories
+
 ## Configuration
 
 ### Environment Variables
@@ -160,11 +164,6 @@ make test-integration        # Run integration tests only
 make test-coverage           # Run tests with coverage report
 make test-fast               # Run tests with fail-fast mode
 make test-handler            # Test handler locally with all test_*.json files (same as CI)
-
-# Test handler locally with specific test files
-PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_input.json)" uv run python src/handler.py
-PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_class_input.json)" uv run python src/handler.py
-PYTHONPATH=src RUNPOD_TEST_INPUT="$(cat test_hf_input.json)" uv run python src/handler.py
 ```
 
 ### Testing Framework
@@ -261,3 +260,8 @@ Configure these in GitHub repository settings:
 
 ### Docker Guidelines
 - Docker container should never refer to src/
+
+- Always run `make quality-check` before pronouncing you have finished your work
+- Always use `git mv` when moving existing files around
+
+- Run the command `make test-handler` to run checks on test files. Do not try to run it one by one like `Bash(env RUNPOD_TEST_INPUT="$(cat test_input.json)" PYTHONPATH=. uv run python handler.py)`
diff --git a/Dockerfile b/Dockerfile
index 0bb269d..6323086 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  && chmod +x /usr/local/bin/uv
 
 # Copy app code and install dependencies
-COPY README.md src/* pyproject.toml uv.lock test_*.json test-handler.sh ./
+COPY README.md src/* pyproject.toml uv.lock ./
 RUN uv sync
 
 
@@ -19,11 +19,12 @@ FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime
 
 WORKDIR /app
 
+# Install nala for system package acceleration in runtime stage
+RUN apt-get update && apt-get install -y --no-install-recommends nala \
+ && rm -rf /var/lib/apt/lists/*
+
 # Copy app and uv binary from builder
 COPY --from=builder /app /app
 COPY --from=builder /usr/local/bin/uv /usr/local/bin/uv
 
-# Clean up any unnecessary system tools
-RUN rm -rf /var/lib/apt/lists/*
-
 CMD ["uv", "run", "handler.py"]
\ No newline at end of file
diff --git a/Dockerfile-cpu b/Dockerfile-cpu
index e0911ff..1ffe7d3 100644
--- a/Dockerfile-cpu
+++ b/Dockerfile-cpu
@@ -11,7 +11,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  && chmod +x /usr/local/bin/uv
 
 # Copy app files and install deps
-COPY README.md src/* pyproject.toml uv.lock test_*.json test-handler.sh ./
+COPY README.md src/* pyproject.toml uv.lock ./
 RUN uv sync
 
 # Stage 2: Runtime stage
@@ -21,7 +21,7 @@ WORKDIR /app
 
 # Install runtime dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    curl ca-certificates \
+    curl ca-certificates nala \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/*
 
diff --git a/Makefile b/Makefile
index 288b40d..c8afdf5 100644
--- a/Makefile
+++ b/Makefile
@@ -68,7 +68,7 @@ test-fast: # Run tests with fast-fail mode
 	uv run pytest tests/ -v -x --tb=short
 
 test-handler: # Test handler locally with all test_*.json files
-	./test-handler.sh
+	cd src && ./test-handler.sh
 
 # Smoke Tests (local on Mac OS)
 
@@ -97,7 +97,7 @@ format-check: # Check code formatting
 
 # Type checking
 typecheck: # Check types with mypy
-	uv run mypy .
+	uv run mypy src/
 
 # Quality gates (used in CI)
-quality-check: format-check lint typecheck test-coverage
+quality-check: format-check lint typecheck test-coverage test-handler
diff --git a/pyproject.toml b/pyproject.toml
index 2288685..d503d21 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,10 @@ requires-python = ">=3.9,<3.13"
 dependencies = [
     "cloudpickle>=3.1.1",
     "pydantic>=2.11.4",
+    "requests>=2.25.0",
     "runpod",
+    "hf_transfer>=0.1.0",
+    "huggingface_hub>=0.32.0",
 ]
 
 [dependency-groups]
@@ -18,6 +21,7 @@ dev = [
     "pytest-asyncio>=0.24.0",
     "ruff>=0.8.0",
     "mypy>=1.11.0",
+    "types-requests>=2.25.0",
 ]
 
 [tool.pytest.ini_options]
@@ -48,40 +52,37 @@ filterwarnings = [
     "ignore::pytest.PytestUnknownMarkWarning"
 ]
 
-[tool.ruff]
-# Exclude tetra-rp directory since it's a separate repository
-exclude = [
-    "tetra-rp/",
-]
-
 [tool.mypy]
-# Basic configuration
 python_version = "3.9"
-warn_return_any = true
-warn_unused_configs = true
-disallow_untyped_defs = false  # Start lenient, can be stricter later
-disallow_incomplete_defs = false
-check_untyped_defs = true
-
-# Import discovery
-mypy_path = "src"
+mypy_path = ["src"]
+explicit_package_bases = true
 namespace_packages = true
-
-# Error output
+check_untyped_defs = true
+disallow_any_generics = true
+disallow_untyped_defs = false
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_return_any = true
+strict_optional = true
 show_error_codes = true
 show_column_numbers = true
 pretty = true
-
-# Exclude directories
 exclude = [
     "tetra-rp/",
-    "tests/",  # Start by excluding tests, can add later
 ]
 
-# Per-module options
 [[tool.mypy.overrides]]
 module = [
-    "runpod.*",
-    "cloudpickle.*",
+    "cloudpickle",
+    "runpod",
+    "transformers",
+    "hf_transfer",
+    "huggingface_hub",
 ]
 ignore_missing_imports = true
+
+[tool.ruff]
+# Exclude tetra-rp directory since it's a separate repository
+exclude = [
+    "tetra-rp/",
+]
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..8ae010c
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1 @@
+"""Worker Tetra package."""
diff --git a/src/class_executor.py b/src/class_executor.py
index 46fa81a..4a3b656 100644
--- a/src/class_executor.py
+++ b/src/class_executor.py
@@ -18,7 +18,7 @@ def __init__(self, workspace_manager):
         super().__init__(workspace_manager)
         # Instance registry for persistent class instances
         self.class_instances: Dict[str, Any] = {}
-        self.instance_metadata: Dict[str, Dict] = {}
+        self.instance_metadata: Dict[str, Dict[str, Any]] = {}
 
     def execute(self, request: FunctionRequest) -> FunctionResponse:
         """Execute class method - required by BaseExecutor interface."""
diff --git a/src/constants.py b/src/constants.py
index 53fd4f7..ee00120 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -20,3 +20,75 @@
 
 RUNTIMES_DIR_NAME = "runtimes"
 """Name of the runtimes directory containing per-endpoint workspaces."""
+
+# Download Acceleration Settings
+MIN_SIZE_FOR_ACCELERATION_MB = 10
+"""Minimum file size in MB to trigger download acceleration."""
+
+DOWNLOAD_TIMEOUT_SECONDS = 600
+"""Default timeout for download operations in seconds."""
+
+# New download accelerator settings
+HF_TRANSFER_ENABLED = True
+"""Enable hf_transfer for fresh HuggingFace downloads."""
+
+
+# Size Conversion Constants
+BYTES_PER_MB = 1024 * 1024
+"""Number of bytes in a megabyte."""
+
+MB_SIZE_THRESHOLD = 1 * BYTES_PER_MB
+"""Minimum file size threshold for considering acceleration (1MB)."""
+
+# HuggingFace Model Patterns
+LARGE_HF_MODEL_PATTERNS = [
+    "albert-large",
+    "albert-xlarge",
+    "bart-large",
+    "bert-large",
+    "bert-base",
+    "codegen",
+    "diffusion",
+    "distilbert-base",
+    "falcon",
+    "gpt",
+    "hubert",
+    "llama",
+    "mistral",
+    "mpt",
+    "pegasus",
+    "roberta-large",
+    "roberta-base",
+    "santacoder",
+    "stable-diffusion",
+    "t5",
+    "vae",
+    "wav2vec2",
+    "whisper",
+    "xlm-roberta",
+    "xlnet",
+]
+"""List of HuggingFace model patterns that benefit from download acceleration."""
+
+# System Package Acceleration with Nala
+LARGE_SYSTEM_PACKAGES = [
+    "build-essential",
+    "cmake",
+    "cuda-toolkit",
+    "curl",
+    "g++",
+    "gcc",
+    "git",
+    "libssl-dev",
+    "nvidia-cuda-dev",
+    "python3-dev",
+    "wget",
+]
+"""List of system packages that benefit from nala's accelerated installation."""
+
+NALA_CHECK_CMD = ["which", "nala"]
+"""Command to check if nala is available."""
+
+# Logging Configuration
+LOG_FORMAT = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
+"""Standard log format string used across the application."""
diff --git a/src/dependency_installer.py b/src/dependency_installer.py
index 8f15c81..1b9b0b9 100644
--- a/src/dependency_installer.py
+++ b/src/dependency_installer.py
@@ -2,9 +2,12 @@
 import subprocess
 import importlib
 import logging
+import asyncio
 from typing import List, Dict
 
 from remote_execution import FunctionResponse
+from download_accelerator import DownloadAccelerator
+from constants import LARGE_SYSTEM_PACKAGES, NALA_CHECK_CMD
 
 
 class DependencyInstaller:
@@ -13,10 +16,21 @@ class DependencyInstaller:
     def __init__(self, workspace_manager):
         self.workspace_manager = workspace_manager
         self.logger = logging.getLogger(__name__)
+        self.download_accelerator = DownloadAccelerator(workspace_manager)
+        self._nala_available = None  # Cache nala availability check
 
-    def install_system_dependencies(self, packages: List[str]) -> FunctionResponse:
+    def install_system_dependencies(
+        self, packages: List[str], accelerate_downloads: bool = True
+    ) -> FunctionResponse:
         """
-        Install system packages using apt-get.
+        Install system packages using nala (accelerated) or apt-get (standard).
+
+        Args:
+            packages: List of system package names
+            accelerate_downloads: Whether to use nala for accelerated downloads
+
+        Returns:
+            FunctionResponse: Object indicating success or failure with details
         """
         if not packages:
             return FunctionResponse(
@@ -25,59 +39,26 @@ def install_system_dependencies(self, packages: List[str]) -> FunctionResponse:
 
         self.logger.info(f"Installing system dependencies: {packages}")
 
-        try:
-            # Update package list first
-            update_process = subprocess.Popen(
-                ["apt-get", "update"],
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-            )
-            update_stdout, update_stderr = update_process.communicate()
+        # Check if we should use accelerated installation with nala
+        large_packages = self._identify_large_system_packages(packages)
 
-            if update_process.returncode != 0:
-                return FunctionResponse(
-                    success=False,
-                    error="Error updating package list",
-                    stdout=update_stderr.decode(),
-                )
-
-            # Install the packages
-            process = subprocess.Popen(
-                ["apt-get", "install", "-y", "--no-install-recommends"] + packages,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                env={
-                    **os.environ,
-                    "DEBIAN_FRONTEND": "noninteractive",
-                },
-            )
-
-            stdout, stderr = process.communicate()
-
-            if process.returncode != 0:
-                return FunctionResponse(
-                    success=False,
-                    error="Error installing system packages",
-                    stdout=stderr.decode(),
-                )
-            else:
-                self.logger.info(f"Successfully installed system packages: {packages}")
-                return FunctionResponse(
-                    success=True,
-                    stdout=stdout.decode(),
-                )
-        except Exception as e:
-            return FunctionResponse(
-                success=False,
-                error=f"Exception during system package installation: {e}",
+        if accelerate_downloads and large_packages and self._check_nala_available():
+            self.logger.info(
+                f"Using nala for accelerated installation of system packages: {large_packages}"
             )
+            return self._install_system_with_nala(packages)
+        else:
+            return self._install_system_standard(packages)
 
-    def install_dependencies(self, packages: List[str]) -> FunctionResponse:
+    def install_dependencies(
+        self, packages: List[str], accelerate_downloads: bool = True
+    ) -> FunctionResponse:
         """
-        Install Python packages using uv with differential installation support.
+        Install Python packages using uv (accelerated) or pip (standard).
 
         Args:
             packages: List of package names or package specifications
+            accelerate_downloads: Whether to use uv for accelerated downloads
         Returns:
             FunctionResponse: Object indicating success or failure with details
         """
@@ -86,37 +67,54 @@ def install_dependencies(self, packages: List[str]) -> FunctionResponse:
 
         self.logger.info(f"Installing dependencies: {packages}")
 
-        # If using volume, check which packages are already installed
-        if (
-            self.workspace_manager.has_runpod_volume
-            and self.workspace_manager.venv_path
-            and os.path.exists(self.workspace_manager.venv_path)
-        ):
-            # Validate virtual environment before using it
-            validation_result = self.workspace_manager._validate_virtual_environment()
-            if not validation_result.success:
-                self.logger.warning(
-                    f"Virtual environment is invalid: {validation_result.error}"
+        # Always use UV for Python package installation (more reliable than pip)
+        # When acceleration is enabled, use differential installation
+        if accelerate_downloads:
+            if (
+                self.workspace_manager.has_runpod_volume
+                and self.workspace_manager.venv_path
+                and os.path.exists(self.workspace_manager.venv_path)
+            ):
+                # Validate virtual environment before using it
+                validation_result = (
+                    self.workspace_manager._validate_virtual_environment()
                 )
-                self.logger.info("Reinitializing workspace...")
-                init_result = self.workspace_manager.initialize_workspace()
-                if not init_result.success:
+                if not validation_result.success:
+                    self.logger.warning(
+                        f"Virtual environment is invalid: {validation_result.error}"
+                    )
+                    self.logger.info("Reinitializing workspace...")
+                    init_result = self.workspace_manager.initialize_workspace()
+                    if not init_result.success:
+                        return FunctionResponse(
+                            success=False,
+                            error=f"Failed to reinitialize workspace: {init_result.error}",
+                        )
+                installed_packages = self._get_installed_packages()
+                packages_to_install = self._filter_packages_to_install(
+                    packages, installed_packages
+                )
+
+                if not packages_to_install:
                     return FunctionResponse(
-                        success=False,
-                        error=f"Failed to reinitialize workspace: {init_result.error}",
+                        success=True, stdout="All packages already installed"
                     )
-            installed_packages = self._get_installed_packages()
-            packages_to_install = self._filter_packages_to_install(
-                packages, installed_packages
-            )
 
-            if not packages_to_install:
-                return FunctionResponse(
-                    success=True, stdout="All packages already installed"
-                )
+                packages = packages_to_install
 
-            packages = packages_to_install
+        # Always use UV (works reliably with virtual environments)
+        return self._install_with_uv(packages)
 
+    def _install_with_uv(self, packages: List[str]) -> FunctionResponse:
+        """
+        Install packages using UV package manager
+
+        Args:
+            packages: Packages to install
+
+        Returns:
+            FunctionResponse with installation result
+        """
         try:
             # Prepare environment for virtual environment usage
             env = os.environ.copy()
@@ -127,7 +125,7 @@ def install_dependencies(self, packages: List[str]) -> FunctionResponse:
                 env["VIRTUAL_ENV"] = self.workspace_manager.venv_path
 
             # Use uv pip to install the packages
-            command = ["uv", "pip", "install", "--no-cache-dir"] + packages
+            command = ["uv", "pip", "install"] + packages
             process = subprocess.Popen(
                 command,
                 stdout=subprocess.PIPE,
@@ -211,3 +209,201 @@ def _filter_packages_to_install(
                 packages_to_install.append(package)
 
         return packages_to_install
+
+    def _check_nala_available(self) -> bool:
+        """
+        Check if nala is available and cache the result.
+
+        Returns:
+            True if nala is available, False otherwise
+        """
+        if self._nala_available is None:
+            try:
+                process = subprocess.Popen(
+                    NALA_CHECK_CMD,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                )
+                process.communicate()
+                self._nala_available = process.returncode == 0
+
+                if self._nala_available:
+                    self.logger.debug(
+                        "nala is available for accelerated system package installation"
+                    )
+                else:
+                    self.logger.debug("nala is not available, falling back to apt-get")
+
+            except Exception:
+                self._nala_available = False
+                self.logger.debug(
+                    "nala availability check failed, falling back to apt-get"
+                )
+
+        return self._nala_available
+
+    def _identify_large_system_packages(self, packages: List[str]) -> List[str]:
+        """
+        Identify system packages that are likely to be large and benefit from acceleration.
+
+        Args:
+            packages: List of system package names
+
+        Returns:
+            List of package names that are likely large
+        """
+        large_packages = []
+        for package in packages:
+            if any(pattern in package for pattern in LARGE_SYSTEM_PACKAGES):
+                large_packages.append(package)
+        return large_packages
+
+    def _install_system_with_nala(self, packages: List[str]) -> FunctionResponse:
+        """
+        Install system packages using nala for accelerated downloads.
+
+        Args:
+            packages: System packages to install
+
+        Returns:
+            FunctionResponse with installation result
+        """
+        try:
+            # Update package list first with nala
+            self.logger.info("Updating package list with nala")
+            update_process = subprocess.Popen(
+                ["nala", "update"],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
+            update_stdout, update_stderr = update_process.communicate()
+
+            if update_process.returncode != 0:
+                self.logger.warning(
+                    "nala update failed, falling back to standard installation"
+                )
+                return self._install_system_standard(packages)
+
+            # Install packages with nala
+            self.logger.info("Installing packages with nala acceleration")
+            process = subprocess.Popen(
+                ["nala", "install", "-y"] + packages,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                env={
+                    **os.environ,
+                    "DEBIAN_FRONTEND": "noninteractive",
+                },
+            )
+
+            stdout, stderr = process.communicate()
+
+            if process.returncode != 0:
+                self.logger.warning(
+                    "nala installation failed, falling back to standard installation"
+                )
+                return self._install_system_standard(packages)
+            else:
+                self.logger.info(
+                    f"Successfully installed system packages with nala: {packages}"
+                )
+                return FunctionResponse(
+                    success=True,
+                    stdout=f"Installed with nala acceleration: {stdout.decode()}",
+                )
+        except Exception as e:
+            self.logger.warning(
+                f"nala installation failed with exception, falling back to standard: {e}"
+            )
+            return self._install_system_standard(packages)
+
+    def _install_system_standard(self, packages: List[str]) -> FunctionResponse:
+        """
+        Install system packages using standard apt-get method.
+
+        Args:
+            packages: System packages to install
+
+        Returns:
+            FunctionResponse with installation result
+        """
+        try:
+            # Update package list first
+            update_process = subprocess.Popen(
+                ["apt-get", "update"],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
+            update_stdout, update_stderr = update_process.communicate()
+
+            if update_process.returncode != 0:
+                return FunctionResponse(
+                    success=False,
+                    error="Error updating package list",
+                    stdout=update_stderr.decode(),
+                )
+
+            # Install the packages
+            process = subprocess.Popen(
+                ["apt-get", "install", "-y", "--no-install-recommends"] + packages,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                env={
+                    **os.environ,
+                    "DEBIAN_FRONTEND": "noninteractive",
+                },
+            )
+
+            stdout, stderr = process.communicate()
+
+            if process.returncode != 0:
+                return FunctionResponse(
+                    success=False,
+                    error="Error installing system packages",
+                    stdout=stderr.decode(),
+                )
+            else:
+                self.logger.info(f"Successfully installed system packages: {packages}")
+                return FunctionResponse(
+                    success=True,
+                    stdout=stdout.decode(),
+                )
+        except Exception as e:
+            return FunctionResponse(
+                success=False,
+                error=f"Exception during system package installation: {e}",
+            )
+
+    async def install_system_dependencies_async(
+        self, packages: List[str], accelerate_downloads: bool = True
+    ) -> FunctionResponse:
+        """
+        Async wrapper for system dependency installation.
+
+        Args:
+            packages: List of system package names
+            accelerate_downloads: Whether to use nala for accelerated downloads
+
+        Returns:
+            FunctionResponse: Object indicating success or failure with details
+        """
+        return await asyncio.to_thread(
+            self.install_system_dependencies, packages, accelerate_downloads
+        )
+
+    async def install_dependencies_async(
+        self, packages: List[str], accelerate_downloads: bool = True
+    ) -> FunctionResponse:
+        """
+        Async wrapper for Python dependency installation.
+
+        Args:
+            packages: List of package names or package specifications
+            accelerate_downloads: Whether to use uv for accelerated downloads
+
+        Returns:
+            FunctionResponse: Object indicating success or failure with details
+        """
+        return await asyncio.to_thread(
+            self.install_dependencies, packages, accelerate_downloads
+        )
diff --git a/src/download_accelerator.py b/src/download_accelerator.py
new file mode 100644
index 0000000..9f59385
--- /dev/null
+++ b/src/download_accelerator.py
@@ -0,0 +1,266 @@
+"""
+Download acceleration using hf_transfer for optimal HuggingFace model downloads.
+
+This module provides accelerated download capabilities optimized for HuggingFace models:
+- hf_transfer for accelerated downloads when available
+- hf_xet acceleration is automatically handled by HuggingFace Hub (huggingface_hub>=0.32.0)
+- Standard HF hub as reliable fallback
+"""
+
+import os
+import time
+import logging
+from dataclasses import dataclass
+from typing import Optional
+
+from remote_execution import FunctionResponse
+from constants import (
+    MIN_SIZE_FOR_ACCELERATION_MB,
+    HF_TRANSFER_ENABLED,
+)
+
+
+@dataclass
+class DownloadMetrics:
+    """Performance metrics for download operations."""
+
+    method: str
+    file_size_bytes: int
+    total_time_seconds: float
+    average_speed_mbps: float
+    success: bool
+    error_message: Optional[str] = None
+
+    @property
+    def speed_mb_per_sec(self) -> float:
+        """Convert to MB/s for easier reading."""
+        return self.average_speed_mbps / 8.0
+
+    @property
+    def file_size_mb(self) -> float:
+        """File size in megabytes."""
+        return self.file_size_bytes / (1024 * 1024)
+
+
+class HfTransferDownloader:
+    """HuggingFace Transfer downloader for fresh downloads."""
+
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+        self.hf_transfer_available = self._check_hf_transfer()
+
+    def _check_hf_transfer(self) -> bool:
+        """Check if hf_transfer is available."""
+        import importlib.util
+
+        if importlib.util.find_spec("hf_transfer") is not None:
+            return HF_TRANSFER_ENABLED
+        else:
+            self.logger.debug("hf_transfer not available")
+            return False
+
+    def download(
+        self,
+        url: str,
+        output_path: str,
+        show_progress: bool = False,
+    ) -> DownloadMetrics:
+        """
+        Download file using hf_transfer for maximum speed.
+
+        Args:
+            url: URL to download
+            output_path: Local file path to save to
+            show_progress: Whether to show real-time progress
+
+        Returns:
+            DownloadMetrics with performance data
+        """
+        if not self.hf_transfer_available:
+            raise RuntimeError("hf_transfer not available")
+
+        start_time = time.time()
+
+        try:
+            # Set HF_HUB_ENABLE_HF_TRANSFER environment variable
+            env = os.environ.copy()
+            env["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+
+            # Add authentication if HF token is available
+            hf_token = os.environ.get("HF_TOKEN")
+            if hf_token:
+                env["HF_TOKEN"] = hf_token
+
+            # Use hf_transfer via huggingface_hub
+            from huggingface_hub import hf_hub_download
+
+            # Extract model_id and filename from URL
+            # URL format: https://huggingface.co/{model_id}/resolve/{revision}/{filename}
+            if "huggingface.co" in url and "/resolve/" in url:
+                parts = url.replace("https://huggingface.co/", "").split("/resolve/")
+                model_id = parts[0]
+                revision_and_filename = parts[1].split("/", 1)
+                revision = revision_and_filename[0]
+                filename = revision_and_filename[1]
+
+                # Create output directory
+                os.makedirs(os.path.dirname(output_path), exist_ok=True)
+
+                # Download using hf_hub_download with hf_transfer enabled
+                downloaded_path = hf_hub_download(
+                    repo_id=model_id,
+                    filename=filename,
+                    revision=revision,
+                    cache_dir=os.path.dirname(output_path),
+                    local_dir=os.path.dirname(output_path),
+                    local_dir_use_symlinks=False,
+                )
+
+                # Move to expected location if needed
+                if downloaded_path != output_path:
+                    import shutil
+
+                    shutil.move(downloaded_path, output_path)
+
+            else:
+                # Fallback to direct download for non-HF URLs
+                raise ValueError("hf_transfer only supports HuggingFace URLs")
+
+            end_time = time.time()
+            file_size = (
+                os.path.getsize(output_path) if os.path.exists(output_path) else 0
+            )
+            total_time = end_time - start_time
+
+            if total_time > 0 and file_size > 0:
+                bits_per_second = (file_size * 8) / total_time
+                avg_speed = bits_per_second / (1024 * 1024)
+            else:
+                avg_speed = 0
+
+            self.logger.info(
+                f"Downloaded {file_size / (1024 * 1024):.1f}MB in {total_time:.1f}s "
+                f"({avg_speed / 8:.1f} MB/s) using hf_transfer"
+            )
+
+            return DownloadMetrics(
+                method="hf_transfer",
+                file_size_bytes=file_size,
+                total_time_seconds=total_time,
+                average_speed_mbps=avg_speed,
+                success=True,
+            )
+
+        except Exception as e:
+            self.logger.error(f"hf_transfer download failed: {str(e)}")
+            return DownloadMetrics(
+                method="hf_transfer",
+                file_size_bytes=0,
+                total_time_seconds=time.time() - start_time,
+                average_speed_mbps=0,
+                success=False,
+                error_message=str(e),
+            )
+
+
+class DownloadAccelerator:
+    """
+    Main download acceleration coordinator using hf_transfer.
+
+    Note: hf_xet acceleration is now automatically handled by HuggingFace Hub
+    when using hf_hub_download() or snapshot_download() functions.
+    """
+
+    def __init__(self, workspace_manager=None):
+        self.workspace_manager = workspace_manager
+        self.logger = logging.getLogger(__name__)
+        self.hf_transfer_downloader = HfTransferDownloader()
+
+    def should_accelerate_download(
+        self, url: str, estimated_size_mb: float = 0
+    ) -> bool:
+        """
+        Determine if download should be accelerated.
+
+        Args:
+            url: Download URL
+            estimated_size_mb: Estimated file size in MB
+
+        Returns:
+            True if download should be accelerated
+        """
+        # Only accelerate HuggingFace downloads with our new methods
+        if "huggingface.co" not in url:
+            return False
+
+        if estimated_size_mb >= MIN_SIZE_FOR_ACCELERATION_MB:
+            return True
+
+        # For HuggingFace URLs, always try acceleration
+        return True
+
+    def is_file_cached(self, output_path: str) -> bool:
+        """Check if file is already cached locally."""
+        return os.path.exists(output_path) and os.path.getsize(output_path) > 0
+
+    def download_with_fallback(
+        self,
+        url: str,
+        output_path: str,
+        estimated_size_mb: float = 0,
+        show_progress: bool = False,
+    ) -> FunctionResponse:
+        """
+        Download with HF optimization when applicable.
+
+        Strategy:
+        1. Use hf_transfer for HF URLs when available and size warrants acceleration
+        2. Otherwise return failure - let HF's native download handling work
+
+        Args:
+            url: URL to download
+            output_path: Local file path
+            estimated_size_mb: Estimated size for acceleration decision
+            show_progress: Whether to show progress
+
+        Returns:
+            FunctionResponse with download result
+        """
+        if not self.should_accelerate_download(url, estimated_size_mb):
+            self.logger.info(
+                f"Not accelerating download, letting HF handle natively: {url}"
+            )
+            return FunctionResponse(
+                success=False,
+                error="No acceleration available - defer to HF native handling",
+            )
+
+        # Strategy 1: Try hf_transfer (hf_xet is automatically used by HF Hub when available)
+        if self.hf_transfer_downloader.hf_transfer_available:
+            try:
+                self.logger.info(f"Using hf_transfer for download: {url}")
+                metrics = self.hf_transfer_downloader.download(
+                    url, output_path, show_progress=show_progress
+                )
+
+                if metrics.success:
+                    return FunctionResponse(
+                        success=True,
+                        stdout=f"Downloaded {metrics.file_size_mb:.1f}MB in {metrics.total_time_seconds:.1f}s "
+                        f"({metrics.speed_mb_per_sec:.1f} MB/s) using hf_transfer",
+                    )
+                else:
+                    self.logger.warning(
+                        f"hf_transfer download failed: {metrics.error_message}"
+                    )
+            except Exception as e:
+                self.logger.warning(f"hf_transfer download failed: {e}")
+
+        # No acceleration available - let HF handle natively
+        self.logger.info(
+            f"No acceleration available for {url}, deferring to HF native handling"
+        )
+        return FunctionResponse(
+            success=False,
+            error="Acceleration not available - defer to HF native handling",
+        )
diff --git a/src/handler.py b/src/handler.py
index 31893a3..0cd0903 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -1,19 +1,21 @@
 import runpod
 import logging
 import sys
+from typing import Dict, Any
 
 from remote_execution import FunctionRequest, FunctionResponse
 from remote_executor import RemoteExecutor
+from constants import LOG_FORMAT
 
 
 logging.basicConfig(
     level=logging.DEBUG,  # or INFO for less verbose output
     stream=sys.stdout,  # send logs to stdout (so docker captures it)
-    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    format=LOG_FORMAT,
 )
 
 
-async def handler(event: dict) -> dict:
+async def handler(event: Dict[str, Any]) -> Dict[str, Any]:
     """
     RunPod serverless function handler with dependency installation.
     """
diff --git a/src/hf_download_strategy.py b/src/hf_download_strategy.py
new file mode 100644
index 0000000..d8e1df0
--- /dev/null
+++ b/src/hf_download_strategy.py
@@ -0,0 +1,81 @@
+"""
+HuggingFace download strategy interface.
+
+Provides pluggable download strategies for HuggingFace models to allow
+switching between different acceleration methods and benchmarking performance.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Dict, Any
+from remote_execution import FunctionResponse
+
+
+class HFDownloadStrategy(ABC):
+    """Abstract base class for HuggingFace download strategies."""
+
+    @abstractmethod
+    def download_model(self, model_id: str, revision: str = "main") -> FunctionResponse:
+        """
+        Download a HuggingFace model.
+
+        Args:
+            model_id: HuggingFace model identifier
+            revision: Model revision/branch
+
+        Returns:
+            FunctionResponse with download results
+        """
+        pass
+
+    @abstractmethod
+    def is_model_cached(self, model_id: str, revision: str = "main") -> bool:
+        """
+        Check if model is already cached.
+
+        Args:
+            model_id: HuggingFace model identifier
+            revision: Model revision/branch
+
+        Returns:
+            True if model appears to be cached
+        """
+        pass
+
+    @abstractmethod
+    def get_cache_info(self, model_id: str) -> Dict[str, Any]:
+        """
+        Get cache information for a model.
+
+        Args:
+            model_id: HuggingFace model identifier
+
+        Returns:
+            Dictionary with cache information
+        """
+        pass
+
+    @abstractmethod
+    def should_accelerate(self, model_id: str) -> bool:
+        """
+        Determine if model should use acceleration.
+
+        Args:
+            model_id: HuggingFace model identifier
+
+        Returns:
+            True if acceleration should be used
+        """
+        pass
+
+    @abstractmethod
+    def clear_model_cache(self, model_id: str) -> FunctionResponse:
+        """
+        Clear cache for a specific model.
+
+        Args:
+            model_id: HuggingFace model identifier
+
+        Returns:
+            FunctionResponse with clearing result
+        """
+        pass
diff --git a/src/hf_downloader_native.py b/src/hf_downloader_native.py
new file mode 100644
index 0000000..4e1f630
--- /dev/null
+++ b/src/hf_downloader_native.py
@@ -0,0 +1,175 @@
+"""
+Native HuggingFace downloader strategy.
+
+This strategy implements the current simplified approach using HF Hub's
+native snapshot_download() with built-in acceleration support.
+"""
+
+import logging
+from typing import Dict, Any
+
+from huggingface_hub import HfApi, snapshot_download
+from remote_execution import FunctionResponse
+from hf_download_strategy import HFDownloadStrategy
+from constants import LARGE_HF_MODEL_PATTERNS, BYTES_PER_MB
+
+
+class NativeHFDownloader(HFDownloadStrategy):
+    """Native HuggingFace downloader using HF Hub's built-in acceleration."""
+
+    def __init__(self, workspace_manager):
+        self.workspace_manager = workspace_manager
+        self.logger = logging.getLogger(__name__)
+        self.api = HfApi()
+
+        # HF will automatically use HF_HOME environment variable set by workspace_manager
+        # No need to manually manage cache directories
+
+    def should_accelerate(self, model_id: str) -> bool:
+        """
+        Determine if model should be pre-cached.
+        HF Hub automatically uses hf_transfer when available.
+
+        Args:
+            model_id: HuggingFace model identifier
+
+        Returns:
+            True if model should be pre-cached
+        """
+        model_lower = model_id.lower()
+        return any(pattern in model_lower for pattern in LARGE_HF_MODEL_PATTERNS)
+
+    def download_model(self, model_id: str, revision: str = "main") -> FunctionResponse:
+        """
+        Pre-download HuggingFace model using HF Hub's native caching.
+
+        This method downloads the complete model snapshot to HF's standard cache
+        location, leveraging hf_transfer when available.
+
+        Args:
+            model_id: HuggingFace model identifier
+            revision: Model revision/branch
+
+        Returns:
+            FunctionResponse with download results
+        """
+        if not self.should_accelerate(model_id):
+            return FunctionResponse(
+                success=True, stdout=f"Model {model_id} does not require pre-caching"
+            )
+
+        self.logger.info(f"Pre-caching model: {model_id}")
+
+        try:
+            # Use HF Hub's native snapshot download with acceleration
+            snapshot_path = snapshot_download(
+                repo_id=model_id,
+                revision=revision,
+                # HF automatically uses HF_HOME/HF_HUB_CACHE from environment
+                # and applies hf_transfer acceleration when available
+            )
+
+            return FunctionResponse(
+                success=True,
+                stdout=f"Successfully pre-cached model {model_id} to {snapshot_path}",
+            )
+
+        except Exception as e:
+            return FunctionResponse(
+                success=False,
+                error=f"Failed to pre-cache model {model_id}: {str(e)}",
+            )
+
+    def is_model_cached(self, model_id: str, revision: str = "main") -> bool:
+        """
+        Check if model is already cached using HF Hub's cache utilities.
+
+        Args:
+            model_id: HuggingFace model identifier
+            revision: Model revision/branch
+
+        Returns:
+            True if model appears to be cached
+        """
+        try:
+            from huggingface_hub import try_to_load_from_cache
+
+            # Check for common model files that indicate a cached model
+            key_files = ["config.json", "pytorch_model.bin", "model.safetensors"]
+
+            for filename in key_files:
+                cached_path = try_to_load_from_cache(
+                    repo_id=model_id, filename=filename, revision=revision
+                )
+                if cached_path is not None:  # Found cached file
+                    return True
+
+            return False
+        except Exception:
+            return False
+
+    def get_cache_info(self, model_id: str) -> Dict[str, Any]:
+        """
+        Get cache information for a model using HF Hub utilities.
+
+        Args:
+            model_id: HuggingFace model identifier
+
+        Returns:
+            Dictionary with cache information
+        """
+        try:
+            from huggingface_hub import scan_cache_dir
+
+            cache_info = scan_cache_dir()
+
+            # Find our specific model in the cache
+            for repo in cache_info.repos:
+                if repo.repo_id == model_id:
+                    return {
+                        "cached": True,
+                        "cache_size_mb": repo.size_on_disk / BYTES_PER_MB,
+                        "file_count": len(list(repo.revisions)[0].files)
+                        if repo.revisions
+                        else 0,
+                        "cache_path": str(repo.repo_path),
+                    }
+
+            return {"cached": False, "cache_size_mb": 0, "file_count": 0}
+
+        except Exception:
+            return {"cached": False, "cache_size_mb": 0, "file_count": 0}
+
+    def clear_model_cache(self, model_id: str) -> FunctionResponse:
+        """
+        Clear cache for a specific model using HF Hub utilities.
+
+        Args:
+            model_id: HuggingFace model identifier
+
+        Returns:
+            FunctionResponse with clearing result
+        """
+        try:
+            from huggingface_hub import scan_cache_dir
+
+            cache_info = scan_cache_dir()
+
+            # Find and delete our specific model
+            for repo in cache_info.repos:
+                if repo.repo_id == model_id:
+                    delete_strategy = cache_info.delete_revisions(repo.repo_id)
+                    delete_strategy.execute()
+
+                    return FunctionResponse(
+                        success=True, stdout=f"Cleared cache for model {model_id}"
+                    )
+
+            return FunctionResponse(
+                success=True, stdout=f"No cache found for model {model_id}"
+            )
+
+        except Exception as e:
+            return FunctionResponse(
+                success=False, error=f"Failed to clear cache for {model_id}: {str(e)}"
+            )
diff --git a/src/hf_downloader_tetra.py b/src/hf_downloader_tetra.py
new file mode 100644
index 0000000..d9fa6ab
--- /dev/null
+++ b/src/hf_downloader_tetra.py
@@ -0,0 +1,270 @@
+"""
+Tetra HuggingFace downloader strategy.
+
+This strategy implements a custom acceleration logic with
+manual file enumeration and file-by-file downloads using
+hf_transfer and custom acceleration methods.
+"""
+
+import logging
+from typing import Dict, List, Any
+from pathlib import Path
+
+from huggingface_hub import HfApi
+from remote_execution import FunctionResponse
+from hf_download_strategy import HFDownloadStrategy
+from download_accelerator import DownloadAccelerator
+from constants import LARGE_HF_MODEL_PATTERNS, BYTES_PER_MB, MB_SIZE_THRESHOLD
+
+
+class TetraHFDownloader(HFDownloadStrategy):
+    """Custom Tetra HuggingFace downloader with manual acceleration logic."""
+
+    def __init__(self, workspace_manager):
+        self.workspace_manager = workspace_manager
+        self.logger = logging.getLogger(__name__)
+        self.download_accelerator = DownloadAccelerator(workspace_manager)
+        self.api = HfApi()
+
+        # Use workspace manager's HF cache if available
+        if workspace_manager and workspace_manager.hf_cache_path:
+            self.cache_dir = Path(workspace_manager.hf_cache_path)
+        else:
+            self.cache_dir = Path.home() / ".cache" / "huggingface"
+
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+
+    def get_model_files(
+        self, model_id: str, revision: str = "main"
+    ) -> List[Dict[str, Any]]:
+        """
+        Get list of files for a HuggingFace model using the HF Hub API.
+
+        Args:
+            model_id: HuggingFace model identifier (e.g., 'gpt2', 'microsoft/DialoGPT-medium')
+            revision: Model revision/branch (default: 'main')
+
+        Returns:
+            List of file information dictionaries
+        """
+        try:
+            # Use HF Hub's native API instead of manual requests
+            repo_info = self.api.repo_info(model_id, revision=revision)
+
+            files = []
+            if repo_info.siblings:
+                for sibling in repo_info.siblings:
+                    if sibling.rfilename:  # Only include actual files
+                        files.append(
+                            {
+                                "path": sibling.rfilename,
+                                "size": getattr(sibling, "size", 0) or 0,
+                                "url": f"https://huggingface.co/{model_id}/resolve/{revision}/{sibling.rfilename}",
+                            }
+                        )
+
+            return files
+
+        except Exception as e:
+            self.logger.warning(f"Could not fetch model file list for {model_id}: {e}")
+            return []
+
+    def should_accelerate(self, model_id: str) -> bool:
+        """
+        Determine if model downloads should be accelerated.
+
+        Args:
+            model_id: HuggingFace model identifier
+
+        Returns:
+            True if acceleration should be used
+        """
+        # Check if hf_transfer is available
+        has_hf_transfer = (
+            self.download_accelerator.hf_transfer_downloader.hf_transfer_available
+        )
+
+        if not has_hf_transfer:
+            return False
+
+        model_lower = model_id.lower()
+        return any(pattern in model_lower for pattern in LARGE_HF_MODEL_PATTERNS)
+
+    def download_model(self, model_id: str, revision: str = "main") -> FunctionResponse:
+        """
+        Download HuggingFace model files using Tetra's custom acceleration.
+
+        This method downloads model files to the cache before transformers tries to access them,
+        using hf_transfer or custom acceleration for optimized downloads.
+
+        Args:
+            model_id: HuggingFace model identifier
+            revision: Model revision/branch
+
+        Returns:
+            FunctionResponse with download results
+        """
+        if not self.should_accelerate(model_id):
+            return FunctionResponse(
+                success=True, stdout=f"Model {model_id} does not require acceleration"
+            )
+
+        self.logger.info(f"Accelerating model download: {model_id}")
+
+        # Get model file list
+        files = self.get_model_files(model_id, revision)
+        if not files:
+            return FunctionResponse(
+                success=False, error=f"Could not get file list for model {model_id}"
+            )
+
+        # Filter for main model files (ignore small config files)
+        large_files = [f for f in files if f["size"] > MB_SIZE_THRESHOLD]
+
+        if not large_files:
+            return FunctionResponse(
+                success=True, stdout=f"No large files found for model {model_id}"
+            )
+
+        self.logger.info(
+            f"Found {len(large_files)} large files to download for {model_id}"
+        )
+
+        # Create model-specific cache directory
+        model_cache_dir = self.cache_dir / "transformers" / model_id.replace("/", "--")
+        model_cache_dir.mkdir(parents=True, exist_ok=True)
+
+        successful_downloads = 0
+        total_size = sum(f["size"] for f in large_files)
+
+        for file_info in large_files:
+            file_path = model_cache_dir / file_info["path"]
+            file_path.parent.mkdir(parents=True, exist_ok=True)
+
+            # Skip if file already exists and is correct size
+            if file_path.exists() and file_path.stat().st_size == file_info["size"]:
+                self.logger.info(f"✓ {file_info['path']} (cached)")
+                successful_downloads += 1
+                continue
+
+            try:
+                file_size_mb = file_info["size"] / BYTES_PER_MB
+                self.logger.info(
+                    f"Downloading {file_info['path']} ({file_size_mb:.1f}MB)..."
+                )
+
+                # Use download accelerator
+                result = self.download_accelerator.download_with_fallback(
+                    file_info["url"],
+                    str(file_path),
+                    estimated_size_mb=file_size_mb,
+                    show_progress=True,
+                )
+
+                if result.success:
+                    successful_downloads += 1
+                    self.logger.info(f"✓ {file_info['path']} downloaded successfully")
+                else:
+                    self.logger.error(f"✗ {file_info['path']} failed: {result.error}")
+
+            except Exception as e:
+                self.logger.error(
+                    f"✗ {file_info['path']} failed with exception: {str(e)}"
+                )
+
+        success = successful_downloads == len(large_files)
+
+        if success:
+            return FunctionResponse(
+                success=True,
+                stdout=f"Successfully pre-downloaded {successful_downloads} files "
+                f"({total_size / BYTES_PER_MB:.1f}MB) for model {model_id}",
+            )
+        else:
+            return FunctionResponse(
+                success=False,
+                error=f"Failed to download {len(large_files) - successful_downloads} files for {model_id}",
+                stdout=f"Downloaded {successful_downloads}/{len(large_files)} files",
+            )
+
+    def is_model_cached(self, model_id: str, revision: str = "main") -> bool:
+        """
+        Check if model is already cached.
+
+        Args:
+            model_id: HuggingFace model identifier
+            revision: Model revision/branch
+
+        Returns:
+            True if model appears to be cached
+        """
+        model_cache_dir = self.cache_dir / "transformers" / model_id.replace("/", "--")
+
+        if not model_cache_dir.exists():
+            return False
+
+        # Check if there are any model files
+        model_files = list(model_cache_dir.glob("**/*.bin")) + list(
+            model_cache_dir.glob("**/*.safetensors")
+        )
+        return len(model_files) > 0
+
+    def get_cache_info(self, model_id: str) -> Dict[str, Any]:
+        """
+        Get cache information for a model.
+
+        Args:
+            model_id: HuggingFace model identifier
+
+        Returns:
+            Dictionary with cache information
+        """
+        model_cache_dir = self.cache_dir / "transformers" / model_id.replace("/", "--")
+
+        if not model_cache_dir.exists():
+            return {"cached": False, "cache_size_mb": 0, "file_count": 0}
+
+        total_size = 0
+        file_count = 0
+
+        for file_path in model_cache_dir.rglob("*"):
+            if file_path.is_file():
+                total_size += file_path.stat().st_size
+                file_count += 1
+
+        return {
+            "cached": file_count > 0,
+            "cache_size_mb": total_size / BYTES_PER_MB,
+            "file_count": file_count,
+            "cache_path": str(model_cache_dir),
+        }
+
+    def clear_model_cache(self, model_id: str) -> FunctionResponse:
+        """
+        Clear cache for a specific model.
+
+        Args:
+            model_id: HuggingFace model identifier
+
+        Returns:
+            FunctionResponse with clearing result
+        """
+        model_cache_dir = self.cache_dir / "transformers" / model_id.replace("/", "--")
+
+        if not model_cache_dir.exists():
+            return FunctionResponse(
+                success=True, stdout=f"No cache found for model {model_id}"
+            )
+
+        try:
+            import shutil
+
+            shutil.rmtree(model_cache_dir)
+
+            return FunctionResponse(
+                success=True, stdout=f"Cleared cache for model {model_id}"
+            )
+        except Exception as e:
+            return FunctionResponse(
+                success=False, error=f"Failed to clear cache for {model_id}: {str(e)}"
+            )
diff --git a/src/hf_strategy_factory.py b/src/hf_strategy_factory.py
new file mode 100644
index 0000000..1ce81de
--- /dev/null
+++ b/src/hf_strategy_factory.py
@@ -0,0 +1,119 @@
+"""
+HuggingFace download strategy factory.
+
+Provides configuration system for switching between different HF download strategies
+and creating the appropriate downloader instance based on environment variables.
+"""
+
+import os
+import logging
+from typing import Optional, Dict, Any
+
+from hf_download_strategy import HFDownloadStrategy
+from hf_downloader_tetra import TetraHFDownloader
+from hf_downloader_native import NativeHFDownloader
+
+
+class HFStrategyFactory:
+    """Factory for creating HF download strategy instances."""
+
+    # Environment variable name
+    STRATEGY_ENV_VAR = "HF_DOWNLOAD_STRATEGY"
+
+    # Available strategy names
+    TETRA_STRATEGY = "tetra"
+    NATIVE_STRATEGY = "native"
+
+    # Default strategy
+    DEFAULT_STRATEGY = TETRA_STRATEGY
+
+    @classmethod
+    def get_available_strategies(cls) -> list[str]:
+        """Get list of available strategy names."""
+        return [cls.TETRA_STRATEGY, cls.NATIVE_STRATEGY]
+
+    @classmethod
+    def get_configured_strategy(cls) -> str:
+        """
+        Get the configured strategy name from environment variables.
+
+        Returns:
+            Strategy name (defaults to native if not configured)
+        """
+        strategy = os.environ.get(cls.STRATEGY_ENV_VAR, cls.DEFAULT_STRATEGY).lower()
+
+        # Validate strategy
+        if strategy not in cls.get_available_strategies():
+            logger = logging.getLogger(__name__)
+            logger.warning(
+                f"Unknown HF download strategy '{strategy}', falling back to '{cls.DEFAULT_STRATEGY}'"
+            )
+            return cls.DEFAULT_STRATEGY
+
+        return strategy
+
+    @classmethod
+    def create_strategy(
+        cls, workspace_manager, strategy: Optional[str] = None
+    ) -> HFDownloadStrategy:
+        """
+        Create HF download strategy instance.
+
+        Args:
+            workspace_manager: Workspace manager instance
+            strategy: Optional strategy override (defaults to environment configuration)
+
+        Returns:
+            HFDownloadStrategy instance
+        """
+        if strategy is None:
+            strategy = cls.get_configured_strategy()
+
+        logger = logging.getLogger(__name__)
+        logger.info(f"Creating HF download strategy: {strategy}")
+
+        if strategy == cls.TETRA_STRATEGY:
+            return TetraHFDownloader(workspace_manager)
+        elif strategy == cls.NATIVE_STRATEGY:
+            return NativeHFDownloader(workspace_manager)
+        else:
+            # Fallback to native
+            logger.warning(f"Unknown strategy '{strategy}', using native")
+            return NativeHFDownloader(workspace_manager)
+
+    @classmethod
+    def set_strategy(cls, strategy: str) -> None:
+        """
+        Set the HF download strategy via environment variable.
+
+        Args:
+            strategy: Strategy name to set
+        """
+        if strategy not in cls.get_available_strategies():
+            raise ValueError(
+                f"Invalid strategy '{strategy}'. Available: {cls.get_available_strategies()}"
+            )
+
+        os.environ[cls.STRATEGY_ENV_VAR] = strategy
+
+        logger = logging.getLogger(__name__)
+        logger.info(f"Set HF download strategy to: {strategy}")
+
+    @classmethod
+    def get_strategy_info(cls) -> Dict[str, Any]:
+        """
+        Get information about the current strategy configuration.
+
+        Returns:
+            Dictionary with strategy configuration info
+        """
+        current_strategy = cls.get_configured_strategy()
+        env_value = os.environ.get(cls.STRATEGY_ENV_VAR, "not set")
+
+        return {
+            "current_strategy": current_strategy,
+            "environment_variable": cls.STRATEGY_ENV_VAR,
+            "environment_value": env_value,
+            "default_strategy": cls.DEFAULT_STRATEGY,
+            "available_strategies": cls.get_available_strategies(),
+        }
diff --git a/src/huggingface_accelerator.py b/src/huggingface_accelerator.py
new file mode 100644
index 0000000..2f2b2ad
--- /dev/null
+++ b/src/huggingface_accelerator.py
@@ -0,0 +1,150 @@
+"""
+HuggingFace model download acceleration.
+
+This module provides accelerated downloads for HuggingFace models and datasets,
+integrating with the existing volume workspace caching system using pluggable
+download strategies.
+"""
+
+import logging
+from typing import Dict, List, Any
+
+from huggingface_hub import HfApi
+from remote_execution import FunctionResponse
+from hf_strategy_factory import HFStrategyFactory
+from hf_download_strategy import HFDownloadStrategy
+
+
+class HuggingFaceAccelerator:
+    """Accelerated downloads for HuggingFace models and files using pluggable strategies."""
+
+    def __init__(self, workspace_manager):
+        self.workspace_manager = workspace_manager
+        self.logger = logging.getLogger(__name__)
+        self.api = HfApi()
+
+        # Create the configured download strategy
+        self.strategy: HFDownloadStrategy = HFStrategyFactory.create_strategy(
+            workspace_manager
+        )
+
+    def get_model_files(
+        self, model_id: str, revision: str = "main"
+    ) -> List[Dict[str, Any]]:
+        """
+        Get list of files for a HuggingFace model using the HF Hub API.
+
+        Args:
+            model_id: HuggingFace model identifier (e.g., 'gpt2', 'microsoft/DialoGPT-medium')
+            revision: Model revision/branch (default: 'main')
+
+        Returns:
+            List of file information dictionaries
+        """
+        try:
+            # Use HF Hub's native API instead of manual requests
+            repo_info = self.api.repo_info(model_id, revision=revision)
+
+            files = []
+            if repo_info.siblings:
+                for sibling in repo_info.siblings:
+                    if sibling.rfilename:  # Only include actual files
+                        files.append(
+                            {
+                                "path": sibling.rfilename,
+                                "size": getattr(sibling, "size", 0) or 0,
+                                "url": f"https://huggingface.co/{model_id}/resolve/{revision}/{sibling.rfilename}",
+                            }
+                        )
+
+            return files
+
+        except Exception as e:
+            self.logger.warning(f"Could not fetch model file list for {model_id}: {e}")
+            return []
+
+    def should_accelerate_model(self, model_id: str) -> bool:
+        """
+        Determine if model should be pre-cached using the configured strategy.
+
+        Args:
+            model_id: HuggingFace model identifier
+
+        Returns:
+            True if model should be pre-cached
+        """
+        return self.strategy.should_accelerate(model_id)
+
+    def accelerate_model_download(
+        self, model_id: str, revision: str = "main"
+    ) -> FunctionResponse:
+        """
+        Pre-download HuggingFace model using the configured download strategy.
+
+        Args:
+            model_id: HuggingFace model identifier
+            revision: Model revision/branch
+
+        Returns:
+            FunctionResponse with download results
+        """
+        return self.strategy.download_model(model_id, revision)
+
+    def is_model_cached(self, model_id: str, revision: str = "main") -> bool:
+        """
+        Check if model is already cached using the configured strategy.
+
+        Args:
+            model_id: HuggingFace model identifier
+            revision: Model revision/branch
+
+        Returns:
+            True if model appears to be cached
+        """
+        return self.strategy.is_model_cached(model_id, revision)
+
+    def get_cache_info(self, model_id: str) -> Dict[str, Any]:
+        """
+        Get cache information for a model using the configured strategy.
+
+        Args:
+            model_id: HuggingFace model identifier
+
+        Returns:
+            Dictionary with cache information
+        """
+        return self.strategy.get_cache_info(model_id)
+
+    def clear_model_cache(self, model_id: str) -> FunctionResponse:
+        """
+        Clear cache for a specific model using the configured strategy.
+
+        Args:
+            model_id: HuggingFace model identifier
+
+        Returns:
+            FunctionResponse with clearing result
+        """
+        return self.strategy.clear_model_cache(model_id)
+
+    def get_strategy_info(self) -> Dict[str, Any]:
+        """
+        Get information about the current download strategy.
+
+        Returns:
+            Dictionary with strategy information
+        """
+        strategy_info = HFStrategyFactory.get_strategy_info()
+        strategy_info["strategy_instance"] = type(self.strategy).__name__
+        return strategy_info
+
+    def set_strategy(self, strategy: str) -> None:
+        """
+        Change the download strategy (creates new strategy instance).
+
+        Args:
+            strategy: Strategy name ("tetra" or "native")
+        """
+        HFStrategyFactory.set_strategy(strategy)
+        self.strategy = HFStrategyFactory.create_strategy(self.workspace_manager)
+        self.logger.info(f"Switched to {strategy} download strategy")
diff --git a/src/remote_executor.py b/src/remote_executor.py
index 0e1ac90..043aba0 100644
--- a/src/remote_executor.py
+++ b/src/remote_executor.py
@@ -1,4 +1,6 @@
 import logging
+import asyncio
+from typing import List, Any
 from remote_execution import FunctionRequest, FunctionResponse, RemoteExecutorStub
 from workspace_manager import WorkspaceManager
 from dependency_installer import DependencyInstaller
@@ -40,27 +42,249 @@ async def ExecuteFunction(self, request: FunctionRequest) -> FunctionResponse:
             if workspace_init.stdout:
                 self.logger.info(workspace_init.stdout)
 
+        # Install dependencies and cache models
+        if request.accelerate_downloads:
+            # Run installations in parallel when acceleration is enabled
+            dep_result = await self._install_dependencies_parallel(request)
+            if not dep_result.success:
+                return dep_result
+        else:
+            # Sequential installation when acceleration is disabled
+            dep_result = await self._install_dependencies_sequential(request)
+            if not dep_result.success:
+                return dep_result
+
+        # Route to appropriate execution method based on type
+        execution_type = getattr(request, "execution_type", "function")
+
+        # Execute the function/class
+        if execution_type == "class":
+            result = self.class_executor.execute_class_method(request)
+        else:
+            result = self.function_executor.execute(request)
+
+        # Add acceleration summary to the result
+        self._log_acceleration_summary(request, result)
+
+        return result
+
+    def _log_acceleration_summary(
+        self, request: FunctionRequest, result: FunctionResponse
+    ):
+        """Log acceleration impact summary for performance visibility."""
+        if not hasattr(self.dependency_installer, "download_accelerator"):
+            return
+
+        acceleration_enabled = request.accelerate_downloads
+        has_volume = self.workspace_manager.has_runpod_volume
+        hf_transfer_available = self.dependency_installer.download_accelerator.hf_transfer_downloader.hf_transfer_available
+        nala_available = self.dependency_installer._check_nala_available()
+
+        # Build summary message
+        summary_parts = []
+
+        if acceleration_enabled:
+            summary_parts.append("✓ Download acceleration ENABLED")
+
+            if has_volume:
+                summary_parts.append(
+                    f"✓ Volume workspace: {self.workspace_manager.workspace_path}"
+                )
+                summary_parts.append("✓ Persistent caching enabled")
+            else:
+                summary_parts.append("ℹ No persistent volume - using temporary cache")
+
+            # System package acceleration status
+            if request.system_dependencies:
+                large_system_packages = (
+                    self.dependency_installer._identify_large_system_packages(
+                        request.system_dependencies
+                    )
+                )
+                if large_system_packages and nala_available:
+                    summary_parts.append(
+                        f"✓ System packages with nala: {len(large_system_packages)}"
+                    )
+                elif request.system_dependencies:
+                    summary_parts.append("→ System packages using standard apt-get")
+
+            if request.hf_models_to_cache:
+                summary_parts.append(
+                    f"✓ HF models pre-cached: {len(request.hf_models_to_cache)}"
+                )
+
+        elif acceleration_enabled and not (hf_transfer_available or nala_available):
+            summary_parts.append(
+                "⚠ Download acceleration REQUESTED but no accelerators available"
+            )
+            summary_parts.append("→ Using standard downloads")
+
+        elif not acceleration_enabled:
+            summary_parts.append("- Download acceleration DISABLED")
+            summary_parts.append("→ Using standard downloads")
+
+        # Log the summary
+        if summary_parts:
+            self.logger.debug("=== DOWNLOAD ACCELERATION SUMMARY ===")
+            for part in summary_parts:
+                self.logger.debug(part)
+            self.logger.debug("=====================================")
+
+    async def _install_dependencies_parallel(
+        self, request: FunctionRequest
+    ) -> FunctionResponse:
+        """
+        Install dependencies and cache models in parallel when acceleration is enabled.
+
+        Args:
+            request: FunctionRequest with dependencies to install
+
+        Returns:
+            FunctionResponse indicating overall success/failure
+        """
+        tasks = []
+        task_names = []
+
+        # Add system dependencies task
+        if request.system_dependencies:
+            task = self.dependency_installer.install_system_dependencies_async(
+                request.system_dependencies, request.accelerate_downloads
+            )
+            tasks.append(task)
+            task_names.append("system_dependencies")
+
+        # Add Python dependencies task
+        if request.dependencies:
+            task = self.dependency_installer.install_dependencies_async(
+                request.dependencies, request.accelerate_downloads
+            )
+            tasks.append(task)
+            task_names.append("python_dependencies")
+
+        # Add HF model caching tasks
+        if request.hf_models_to_cache:
+            for model_id in request.hf_models_to_cache:
+                task = self.workspace_manager.accelerate_model_download_async(model_id)
+                tasks.append(task)
+                task_names.append(f"hf_model_{model_id}")
+
+        if not tasks:
+            return FunctionResponse(success=True, stdout="No dependencies to install")
+
+        self.logger.info(
+            f"Starting parallel installation of {len(tasks)} tasks: {task_names}"
+        )
+
+        # Execute all tasks in parallel
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+
+        # Process results and handle failures
+        return self._process_parallel_results(results, task_names)
+
+    async def _install_dependencies_sequential(
+        self, request: FunctionRequest
+    ) -> FunctionResponse:
+        """
+        Install dependencies and cache models sequentially when acceleration is disabled.
+
+        Args:
+            request: FunctionRequest with dependencies to install
+
+        Returns:
+            FunctionResponse indicating overall success/failure
+        """
         # Install system dependencies first
         if request.system_dependencies:
             sys_installed = self.dependency_installer.install_system_dependencies(
-                request.system_dependencies
+                request.system_dependencies, request.accelerate_downloads
             )
             if not sys_installed.success:
                 return sys_installed
             self.logger.info(sys_installed.stdout)
 
+        # Pre-cache HuggingFace models if requested (should not happen when acceleration disabled)
+        if request.accelerate_downloads and request.hf_models_to_cache:
+            for model_id in request.hf_models_to_cache:
+                self.logger.info(f"Pre-caching HuggingFace model: {model_id}")
+                cache_result = self.workspace_manager.accelerate_model_download(
+                    model_id
+                )
+                if cache_result.success:
+                    self.logger.info(
+                        f"Successfully cached model {model_id}: {cache_result.stdout}"
+                    )
+                else:
+                    self.logger.warning(
+                        f"Failed to cache model {model_id}: {cache_result.error}"
+                    )
+
         # Install Python dependencies next
         if request.dependencies:
             py_installed = self.dependency_installer.install_dependencies(
-                request.dependencies
+                request.dependencies, request.accelerate_downloads
             )
             if not py_installed.success:
                 return py_installed
             self.logger.info(py_installed.stdout)
 
-        # Route to appropriate execution method based on type
-        execution_type = getattr(request, "execution_type", "function")
-        if execution_type == "class":
-            return self.class_executor.execute_class_method(request)
+        return FunctionResponse(
+            success=True, stdout="Dependencies installed successfully"
+        )
+
+    def _process_parallel_results(
+        self, results: List[Any], task_names: List[str]
+    ) -> FunctionResponse:
+        """
+        Process results from parallel dependency installation tasks.
+
+        Args:
+            results: List of task results (may include exceptions)
+            task_names: List of task names corresponding to results
+
+        Returns:
+            FunctionResponse with aggregated results
+        """
+        success_count = 0
+        failures = []
+        stdout_parts = []
+
+        for i, result in enumerate(results):
+            task_name = task_names[i]
+
+            if isinstance(result, Exception):
+                # Task raised an exception
+                error_msg = f"{task_name}: Exception - {str(result)}"
+                failures.append(error_msg)
+                self.logger.error(error_msg)
+            elif isinstance(result, FunctionResponse):
+                if result.success:
+                    success_count += 1
+                    stdout_parts.append(f"✓ {task_name}: {result.stdout}")
+                    self.logger.info(f"✓ {task_name} completed successfully")
+                else:
+                    error_msg = f"{task_name}: {result.error}"
+                    failures.append(error_msg)
+                    self.logger.error(f"✗ {task_name} failed: {result.error}")
+            else:
+                # Unexpected result type
+                error_msg = f"{task_name}: Unexpected result type - {type(result)}"
+                failures.append(error_msg)
+                self.logger.error(error_msg)
+
+        # Determine overall success
+        if failures:
+            # Some tasks failed
+            error_summary = f"Failed tasks: {'; '.join(failures)}"
+            return FunctionResponse(
+                success=False,
+                error=error_summary,
+                stdout=f"Parallel installation: {success_count}/{len(results)} tasks succeeded\n"
+                + "\n".join(stdout_parts),
+            )
         else:
-            return self.function_executor.execute(request)
+            # All tasks succeeded
+            return FunctionResponse(
+                success=True,
+                stdout=f"Parallel installation: {success_count}/{len(results)} tasks completed successfully\n"
+                + "\n".join(stdout_parts),
+            )
diff --git a/test-handler.sh b/src/test-handler.sh
similarity index 100%
rename from test-handler.sh
rename to src/test-handler.sh
diff --git a/src/test_class_custom_method.json b/src/test_class_custom_method.json
new file mode 100644
index 0000000..6dc55b3
--- /dev/null
+++ b/src/test_class_custom_method.json
@@ -0,0 +1,13 @@
+{
+  "input": {
+    "execution_type": "class",
+    "class_name": "Calculator",
+    "class_code": "class Calculator:\n    def __init__(self, initial_value=0):\n        self.value = initial_value\n        self.operation_history = []\n    \n    def add(self, operand):\n        old_value = self.value\n        self.value += operand\n        self.operation_history.append(f'{old_value} + {operand} = {self.value}')\n        return self.value\n    \n    def multiply(self, operand):\n        old_value = self.value\n        self.value *= operand\n        self.operation_history.append(f'{old_value} * {operand} = {self.value}')\n        return self.value\n    \n    def get_history(self):\n        return {\n            'current_value': self.value,\n            'operations': self.operation_history,\n            'operation_count': len(self.operation_history)\n        }\n    \n    def reset(self, new_value=0):\n        old_value = self.value\n        self.value = new_value\n        self.operation_history.append(f'Reset from {old_value} to {new_value}')\n        return self.value",
+    "method_name": "multiply",
+    "constructor_args": [\n      "gAWVCgAAAAAAAABHQCQAAAAAAAAu"\n    ],
+    "constructor_kwargs": {},
+    "args": [\n      "gAWVCgAAAAAAAABHQBQAAAAAAAAu"\n    ],
+    "kwargs": {},
+    "create_new_instance": true
+  }
+}
\ No newline at end of file
diff --git a/test_class_input.json b/src/test_class_input.json
similarity index 100%
rename from test_class_input.json
rename to src/test_class_input.json
diff --git a/src/test_class_persistence.json b/src/test_class_persistence.json
new file mode 100644
index 0000000..021907c
--- /dev/null
+++ b/src/test_class_persistence.json
@@ -0,0 +1,12 @@
+{
+  "input": {
+    "execution_type": "class",
+    "class_name": "PersistentCounter",
+    "class_code": "class PersistentCounter:\n    def __init__(self, initial_value=0):\n        self.value = initial_value\n        self.call_history = []\n    \n    def increment(self, amount=1):\n        self.value += amount\n        self.call_history.append(f'incremented by {amount}')\n        return self.value\n    \n    def get_state(self):\n        return {\n            'current_value': self.value,\n            'call_count': len(self.call_history),\n            'call_history': self.call_history\n        }",
+    "method_name": "get_state",
+    "constructor_args": [\n      "gAWVCQAAAAAAAACMATWULg=="\n    ],
+    "constructor_kwargs": {},
+    "args": [],
+    "kwargs": {},
+    "instance_id": "test_persistent_counter_001",
+    "create_new_instance": true\n  }\n}
\ No newline at end of file
diff --git a/src/test_error_scenarios.json b/src/test_error_scenarios.json
new file mode 100644
index 0000000..c45c3db
--- /dev/null
+++ b/src/test_error_scenarios.json
@@ -0,0 +1,5 @@
+{
+  "input": {
+    "function_name": "test_error_handling",
+    "function_code": "def test_error_handling():\n    import sys\n    import traceback\n    \n    # This function tests that the handler can gracefully handle errors\n    # and return proper error information to the client\n    \n    results = {\n        'controlled_errors': {},\n        'environment_checks': {},\n        'error_handling_test': 'completed'\n    }\n    \n    # Test 1: Controlled exception that should be caught\n    try:\n        # This will raise a ZeroDivisionError\n        result = 10 / 0\n        results['controlled_errors']['division_by_zero'] = 'unexpected_success'\n    except ZeroDivisionError as e:\n        results['controlled_errors']['division_by_zero'] = {\n            'error_type': str(type(e).__name__),\n            'error_message': str(e),\n            'handled_correctly': True\n        }\n    \n    # Test 2: Import error for non-existent module\n    try:\n        import non_existent_module_xyz123\n        results['controlled_errors']['import_error'] = 'unexpected_success'\n    except ImportError as e:\n        results['controlled_errors']['import_error'] = {\n            'error_type': str(type(e).__name__),\n            'error_message': str(e),\n            'handled_correctly': True\n        }\n    \n    # Test 3: Test that bad dependencies would fail (but we won't actually use bad deps)\n    # This test verifies the function can run with intentionally missing deps\n    try:\n        # Try to import a package that should exist (this shouldn't fail)\n        import json\n        results['controlled_errors']['json_import'] = {\n            'imported_successfully': True,\n            'has_dumps_method': hasattr(json, 'dumps')\n        }\n    except ImportError as e:\n        results['controlled_errors']['json_import'] = {\n            'imported_successfully': False,\n            'error': str(e)\n        }\n    \n    # Environment checks\n    results['environment_checks'] = {\n        'python_version': sys.version,\n        'platform': sys.platform,\n        'executable': sys.executable\n    }\n    \n    return results\n",
+    "dependencies": [\"nonexistent-package-xyz123\"],\n    "args": [],\n    "kwargs": {}\n  }\n}
\ No newline at end of file
diff --git a/src/test_function_args.json b/src/test_function_args.json
new file mode 100644
index 0000000..ca84a6d
--- /dev/null
+++ b/src/test_function_args.json
@@ -0,0 +1,6 @@
+{
+  "input": {
+    "function_name": "test_function_with_arguments",
+    "function_code": "def test_function_with_arguments(number, text, data_list=None, multiplier=2):\n    import json\n    \n    # Validate the arguments were passed correctly\n    result = {\n        'received_args': {\n            'number': number,\n            'text': text,\n            'data_list': data_list,\n            'multiplier': multiplier\n        },\n        'processed_results': {\n            'number_times_multiplier': number * multiplier,\n            'text_upper': text.upper(),\n            'list_sum': sum(data_list) if data_list else 0,\n            'list_length': len(data_list) if data_list else 0\n        },\n        'argument_types': {\n            'number_type': str(type(number)),\n            'text_type': str(type(text)),\n            'data_list_type': str(type(data_list)),\n            'multiplier_type': str(type(multiplier))\n        }\n    }\n    \n    return result\n",
+    "args": [\n      "gAVLKi4=",\n      "gAWVDwAAAAAAAACMC2hlbGxvIHdvcmxklC4="\n    ],
+    "kwargs": {\n      "data_list": "gAWVDwAAAAAAAABdlChLAUsCSwNLBEsFZS4=",\n      "multiplier": "gAVLAy4="\n    }\n  }\n}
\ No newline at end of file
diff --git a/src/test_hf_accelerated_input.json b/src/test_hf_accelerated_input.json
new file mode 100644
index 0000000..7665a0e
--- /dev/null
+++ b/src/test_hf_accelerated_input.json
@@ -0,0 +1,11 @@
+{
+  "input": {
+    "function_name": "test_hf_acceleration_with_volume",
+    "function_code": "def test_hf_acceleration_with_volume():\n    import os\n    import time\n    from transformers import AutoTokenizer\n    \n    start_time = time.time()\n    \n    # Test HF model download with acceleration enabled\n    model_name = 'gpt2'\n    print(f'Testing accelerated HF model download: {model_name}')\n    \n    tokenizer = AutoTokenizer.from_pretrained(model_name)\n    \n    download_time = time.time() - start_time\n    \n    # Check cache paths\n    cache_info = {\n        'hf_home': os.environ.get('HF_HOME'),\n        'transformers_cache': os.environ.get('TRANSFORMERS_CACHE'),\n        'virtual_env': os.environ.get('VIRTUAL_ENV'),\n        'download_time': round(download_time, 2)\n    }\n    \n    print(f'Download completed in {download_time:.2f}s')\n    print(f'Cache paths: {cache_info}')\n    \n    return {\n        'model_name': model_name,\n        'vocab_size': tokenizer.vocab_size,\n        'cache_info': cache_info,\n        'acceleration_enabled': True,\n        'test_completed': True\n    }\n",
+    "dependencies": ["transformers", "torch"],
+    "accelerate_downloads": true,
+    "hf_models_to_cache": ["gpt2"],
+    "args": [],
+    "kwargs": {}
+  }
+}
\ No newline at end of file
diff --git a/src/test_hf_no_volume.json b/src/test_hf_no_volume.json
new file mode 100644
index 0000000..f72818d
--- /dev/null
+++ b/src/test_hf_no_volume.json
@@ -0,0 +1,11 @@
+{
+  "input": {
+    "function_name": "test_hf_acceleration_no_volume",
+    "function_code": "def test_hf_acceleration_no_volume():\n    import os\n    import time\n    from transformers import AutoTokenizer\n    \n    # Test that HF acceleration works without a RunPod volume\n    # This was the main fix - acceleration should work regardless of volume presence\n    \n    start_time = time.time()\n    \n    model_name = 'gpt2'\n    print(f'Testing HF acceleration without volume: {model_name}')\n    \n    tokenizer = AutoTokenizer.from_pretrained(model_name)\n    \n    download_time = time.time() - start_time\n    \n    # Verify environment shows no volume but acceleration works\n    env_info = {\n        'hf_home': os.environ.get('HF_HOME'),\n        'transformers_cache': os.environ.get('TRANSFORMERS_CACHE'),\n        'virtual_env': os.environ.get('VIRTUAL_ENV'),\n        'has_runpod_volume': '/runpod-volume' in str(os.environ.get('VIRTUAL_ENV', '')),\n        'download_time': round(download_time, 2)\n    }\n    \n    print(f'Download completed in {download_time:.2f}s without volume')\n    print(f'Environment: {env_info}')\n    \n    return {\n        'model_name': model_name,\n        'vocab_size': tokenizer.vocab_size,\n        'environment': env_info,\n        'acceleration_without_volume': True,\n        'test_completed': True\n    }\n",
+    "dependencies": ["transformers", "torch"],
+    "accelerate_downloads": true,
+    "hf_models_to_cache": ["gpt2"],
+    "args": [],
+    "kwargs": {}
+  }
+}
\ No newline at end of file
diff --git a/test_input.json b/src/test_input.json
similarity index 100%
rename from test_input.json
rename to src/test_input.json
diff --git a/src/test_mixed_dependencies.json b/src/test_mixed_dependencies.json
new file mode 100644
index 0000000..9057599
--- /dev/null
+++ b/src/test_mixed_dependencies.json
@@ -0,0 +1,10 @@
+{
+  "input": {
+    "function_name": "test_mixed_dependencies",
+    "function_code": "def test_mixed_dependencies():\n    import subprocess\n    import json\n    import os\n    \n    # Test that both system and Python dependencies are available\n    results = {\n        'system_dependencies': {},\n        'python_dependencies': {},\n        'environment_info': {}\n    }\n    \n    # Test system dependency (wget)\n    try:\n        wget_result = subprocess.run(['wget', '--version'], \n                                   capture_output=True, text=True, timeout=10)\n        results['system_dependencies']['wget'] = {\n            'available': wget_result.returncode == 0,\n            'version': wget_result.stdout.split('\\n')[0] if wget_result.returncode == 0 else None,\n            'error': wget_result.stderr if wget_result.returncode != 0 else None\n        }\n    except Exception as e:\n        results['system_dependencies']['wget'] = {\n            'available': False,\n            'error': str(e)\n        }\n    \n    # Test Python dependencies\n    try:\n        import requests\n        results['python_dependencies']['requests'] = {\n            'available': True,\n            'version': requests.__version__,\n            'location': requests.__file__\n        }\n    except ImportError as e:\n        results['python_dependencies']['requests'] = {\n            'available': False,\n            'error': str(e)\n        }\n    \n    try:\n        import numpy\n        results['python_dependencies']['numpy'] = {\n            'available': True,\n            'version': numpy.__version__,\n            'location': numpy.__file__\n        }\n        # Test numpy functionality\n        arr = numpy.array([1, 2, 3, 4, 5])\n        results['python_dependencies']['numpy']['test_result'] = {\n            'array_sum': int(arr.sum()),\n            'array_mean': float(arr.mean())\n        }\n    except ImportError as e:\n        results['python_dependencies']['numpy'] = {\n            'available': False,\n            'error': str(e)\n        }\n    \n    # Environment info\n    results['environment_info'] = {\n        'running_as_root': os.getuid() == 0 if hasattr(os, 'getuid') else False,\n        'virtual_env': os.environ.get('VIRTUAL_ENV'),\n        'python_path': os.environ.get('PYTHONPATH')\n    }\n    \n    return results\n",
+    "dependencies": ["requests", "numpy"],
+    "system_dependencies": ["wget"],
+    "args": [],
+    "kwargs": {}
+  }
+}
\ No newline at end of file
diff --git a/src/test_system_dependencies.json b/src/test_system_dependencies.json
new file mode 100644
index 0000000..12ee909
--- /dev/null
+++ b/src/test_system_dependencies.json
@@ -0,0 +1,9 @@
+{
+  "input": {
+    "function_name": "test_system_dependencies",
+    "function_code": "def test_system_dependencies():\n    import subprocess\n    import os\n    \n    # Test that system packages were installed successfully\n    # We'll test with curl which is commonly available or gets installed\n    \n    result = {}\n    \n    # Test if curl command is available\n    try:\n        curl_result = subprocess.run(['curl', '--version'], \n                                   capture_output=True, text=True, timeout=10)\n        if curl_result.returncode == 0:\n            result['curl_available'] = True\n            result['curl_version'] = curl_result.stdout.split('\\n')[0]\n        else:\n            result['curl_available'] = False\n            result['curl_error'] = curl_result.stderr\n    except Exception as e:\n        result['curl_available'] = False\n        result['curl_error'] = str(e)\n    \n    # Test if git command is available (should be pre-installed in most containers)\n    try:\n        git_result = subprocess.run(['git', '--version'],\n                                  capture_output=True, text=True, timeout=10)\n        if git_result.returncode == 0:\n            result['git_available'] = True\n            result['git_version'] = git_result.stdout.strip()\n        else:\n            result['git_available'] = False\n            result['git_error'] = git_result.stderr\n    except Exception as e:\n        result['git_available'] = False\n        result['git_error'] = str(e)\n    \n    # Check if we're running as root (needed for apt install)\n    result['running_as_root'] = os.getuid() == 0 if hasattr(os, 'getuid') else False\n    result['environment_check'] = 'system_deps_test_completed'\n    \n    return result\n",
+    "system_dependencies": ["curl"],
+    "args": [],
+    "kwargs": {}
+  }
+}
\ No newline at end of file
diff --git a/src/test_uv_no_acceleration.json b/src/test_uv_no_acceleration.json
new file mode 100644
index 0000000..a3099e3
--- /dev/null
+++ b/src/test_uv_no_acceleration.json
@@ -0,0 +1,10 @@
+{
+  "input": {
+    "function_name": "test_uv_installation_without_acceleration",
+    "function_code": "def test_uv_installation_without_acceleration():\n    import json\n    import sys\n    \n    # Test that packages installed with UV (accelerate_downloads=False) are available\n    try:\n        import requests\n        import transformers\n        \n        # Get package locations to verify they're in the right place\n        requests_location = requests.__file__\n        transformers_location = transformers.__file__\n        \n        # Check if we're using the virtual environment\n        venv_active = hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix)\n        \n        return {\n            'success': True,\n            'message': 'Both requests and transformers imported successfully with UV (no acceleration)',\n            'requests_location': requests_location,\n            'transformers_location': transformers_location,\n            'virtual_env_active': venv_active,\n            'python_prefix': sys.prefix\n        }\n    except ImportError as e:\n        return {\n            'success': False,\n            'error': f'Failed to import packages: {str(e)}',\n            'python_prefix': sys.prefix,\n            'virtual_env_active': hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix)\n        }\n",
+    "dependencies": ["requests", "transformers"],
+    "accelerate_downloads": false,
+    "args": [],
+    "kwargs": {}
+  }
+}
\ No newline at end of file
diff --git a/src/workspace_manager.py b/src/workspace_manager.py
index 38f1982..1276a00 100644
--- a/src/workspace_manager.py
+++ b/src/workspace_manager.py
@@ -3,7 +3,11 @@
 import fcntl
 import time
 import logging
-from typing import Optional
+import asyncio
+from typing import Optional, TYPE_CHECKING, Any, Dict
+
+if TYPE_CHECKING:
+    from huggingface_accelerator import HuggingFaceAccelerator
 
 from remote_execution import FunctionResponse
 from constants import (
@@ -46,6 +50,9 @@ def __init__(self) -> None:
             self.cache_path = None
             self.hf_cache_path = None
 
+        # Initialize HuggingFace accelerator after paths are set
+        self._hf_accelerator: Optional[HuggingFaceAccelerator] = None
+
         if self.has_runpod_volume:
             self._configure_uv_cache()
             self._configure_huggingface_cache()
@@ -62,19 +69,14 @@ def _configure_huggingface_cache(self):
             # Ensure HF cache directory exists
             os.makedirs(self.hf_cache_path, exist_ok=True)
 
-            # Set main HF cache directory
+            # Set main HF cache directory - HF will automatically create subdirectories
             os.environ["HF_HOME"] = self.hf_cache_path
 
-            # Set specific cache paths for different HF components
-            os.environ["TRANSFORMERS_CACHE"] = os.path.join(
-                self.hf_cache_path, "transformers"
-            )
-            os.environ["HF_DATASETS_CACHE"] = os.path.join(
-                self.hf_cache_path, "datasets"
-            )
-            os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join(
-                self.hf_cache_path, "hub"
-            )
+            # HF automatically creates and manages these subdirectories:
+            # - hub/ (for model downloads and cache)
+            # - transformers/ (legacy, but still used by some components)
+            # - datasets/ (for HF datasets)
+            # Let HF handle the hierarchy instead of forcing specific paths
 
     def _configure_volume_environment(self):
         """Configure environment variables for volume usage."""
@@ -371,3 +373,69 @@ def _remove_broken_virtual_environment(self):
                 self.logger.error(
                     f"Error removing broken virtual environment: {str(e)}"
                 )
+
+    @property
+    def hf_accelerator(self) -> "HuggingFaceAccelerator":
+        """Lazy-loaded HuggingFace accelerator."""
+        if self._hf_accelerator is None:
+            from huggingface_accelerator import HuggingFaceAccelerator
+
+            self._hf_accelerator = HuggingFaceAccelerator(self)
+        return self._hf_accelerator
+
+    def accelerate_model_download(
+        self, model_id: str, revision: str = "main"
+    ) -> FunctionResponse:
+        """
+        Pre-download HuggingFace model using acceleration if beneficial.
+
+        Args:
+            model_id: HuggingFace model identifier
+            revision: Model revision/branch
+
+        Returns:
+            FunctionResponse with download result
+        """
+        return self.hf_accelerator.accelerate_model_download(model_id, revision)
+
+    async def accelerate_model_download_async(
+        self, model_id: str, revision: str = "main"
+    ) -> FunctionResponse:
+        """
+        Async wrapper for HuggingFace model download acceleration.
+
+        Args:
+            model_id: HuggingFace model identifier
+            revision: Model revision/branch
+
+        Returns:
+            FunctionResponse with download result
+        """
+        return await asyncio.to_thread(
+            self.accelerate_model_download, model_id, revision
+        )
+
+    def is_model_cached(self, model_id: str, revision: str = "main") -> bool:
+        """
+        Check if a HuggingFace model is cached.
+
+        Args:
+            model_id: HuggingFace model identifier
+            revision: Model revision/branch
+
+        Returns:
+            True if model is cached
+        """
+        return self.hf_accelerator.is_model_cached(model_id, revision)
+
+    def get_model_cache_info(self, model_id: str) -> Dict[str, Any]:
+        """
+        Get cache information for a HuggingFace model.
+
+        Args:
+            model_id: HuggingFace model identifier
+
+        Returns:
+            Dictionary with cache information
+        """
+        return self.hf_accelerator.get_cache_info(model_id)
diff --git a/test_debug_input.json b/test_debug_input.json
deleted file mode 100644
index 5c8db78..0000000
--- a/test_debug_input.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "input": {
-    "function_name": "debug_logging_test",
-    "function_code": "def debug_logging_test():\n    import logging\n    logger = logging.getLogger(__name__)\n    \n    # Test all log levels to verify DEBUG is shown\n    logger.debug(\"DEBUG: This should be visible when LOG_LEVEL=DEBUG\")\n    logger.info(\"INFO: This should always be visible\")\n    logger.warning(\"WARNING: This should always be visible\")\n    logger.error(\"ERROR: This should always be visible\")\n    \n    print(\"Standard output from function execution\")\n    \n    return {\n        \"message\": \"Debug logging test completed\",\n        \"current_log_level\": logging.getLogger().level,\n        \"level_name\": logging.getLevelName(logging.getLogger().level)\n    }\n",
-    "args": [],
-    "kwargs": {}
-  }  
-}
diff --git a/test_hf_input.json b/test_hf_input.json
deleted file mode 100644
index 9dd0c92..0000000
--- a/test_hf_input.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-  "input": {
-    "function_name": "test_hf_model_download",
-    "function_code": "def test_hf_model_download():\n    import os\n    from transformers import AutoTokenizer\n    \n    # Test downloading a small model\n    model_name = 'gpt2'\n    tokenizer = AutoTokenizer.from_pretrained(model_name)\n    \n    # Verify cache environment variables are set\n    hf_home = os.environ.get('HF_HOME')\n    transformers_cache = os.environ.get('TRANSFORMERS_CACHE')\n    \n    result = {\n        'model_loaded': True,\n        'vocab_size': tokenizer.vocab_size,\n        'hf_home': hf_home,\n        'transformers_cache': transformers_cache,\n        'cache_configured': hf_home is not None and transformers_cache is not None\n    }\n    \n    return result\n",
-    "dependencies": ["transformers", "torch"],
-    "args": [],
-    "kwargs": {}
-  }  
-}
diff --git a/test_subprocess_debug.json b/test_subprocess_debug.json
deleted file mode 100644
index 4d2a028..0000000
--- a/test_subprocess_debug.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-  "input": {
-    "function_code": "import subprocess\nimport os\nimport sys\ndef debug_subprocess_environment():\n    \"\"\"Debug subprocess environment to understand vLLM issue.\"\"\"\n    results = []\n    \n    # Check symlink status\n    app_venv_path = '/app/.venv'\n    if os.path.exists(app_venv_path):\n        if os.path.islink(app_venv_path):\n            target = os.readlink(app_venv_path)\n            results.append(f'✓ Symlink exists: {app_venv_path} -> {target}')\n        else:\n            results.append(f'✗ {app_venv_path} is not a symlink')\n    else:\n        results.append(f'✗ {app_venv_path} does not exist')\n    \n    # Check if target venv has vllm\n    try:\n        if os.path.islink(app_venv_path):\n            target = os.readlink(app_venv_path)\n            vllm_path = f'{target}/lib/python*/site-packages/vllm'\n            import glob\n            vllm_dirs = glob.glob(vllm_path)\n            if vllm_dirs:\n                results.append(f'✓ vLLM found in target venv: {vllm_dirs[0]}')\n            else:\n                results.append(f'✗ vLLM not found in target venv (searched: {vllm_path})')\n    except Exception as e:\n        results.append(f'Error checking vLLM in target: {e}')\n    \n    # Test subprocess execution with explicit environment\n    results.append('')\n    results.append('=== Subprocess Tests ===')\n    \n    # Test 1: Direct python version from symlink\n    try:\n        result = subprocess.run(\n            ['/app/.venv/bin/python3', '--version'],\n            capture_output=True, text=True, timeout=10\n        )\n        if result.returncode == 0:\n            results.append(f'✓ Python version from symlink: {result.stdout.strip()}')\n        else:\n            results.append(f'✗ Python failed: {result.stderr.strip()}')\n    except Exception as e:\n        results.append(f'✗ Python subprocess error: {e}')\n    \n    # Test 2: Check if vllm module is accessible\n    try:\n        result = subprocess.run(\n            ['/app/.venv/bin/python3', '-c', 'import vllm; print(\"vLLM import successful\")'],\n            capture_output=True, text=True, timeout=10\n        )\n        if result.returncode == 0:\n            results.append(f'✓ vLLM import from subprocess: {result.stdout.strip()}')\n        else:\n            results.append(f'✗ vLLM import failed: {result.stderr.strip()}')\n    except Exception as e:\n        results.append(f'✗ vLLM import subprocess error: {e}')\n    \n    # Test 3: Check Python path in subprocess\n    try:\n        result = subprocess.run(\n            ['/app/.venv/bin/python3', '-c', 'import sys; print(\"PYTHONPATH:\", sys.path[:3])'],\n            capture_output=True, text=True, timeout=10\n        )\n        if result.returncode == 0:\n            results.append(f'✓ Subprocess Python path: {result.stdout.strip()}')\n        else:\n            results.append(f'✗ Python path check failed: {result.stderr.strip()}')\n    except Exception as e:\n        results.append(f'✗ Python path subprocess error: {e}')\n    \n    # Test 4: Current process environment\n    results.append('')\n    results.append('=== Current Process Environment ===')\n    results.append(f'VIRTUAL_ENV: {os.environ.get(\"VIRTUAL_ENV\", \"Not set\")}')\n    results.append(f'PATH: {os.environ.get(\"PATH\", \"Not set\")[:200]}...')\n    results.append(f'Current Python path: {sys.executable}')\n    \n    return '\\n'.join(results)",
-    "function_name": "debug_subprocess_environment",
-    "args": [],
-    "kwargs": {},
-    "dependencies": ["vllm"]
-  }
-}
diff --git a/test_vllm_symlink.json b/test_vllm_symlink.json
deleted file mode 100644
index 2bd325d..0000000
--- a/test_vllm_symlink.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-  "input": {
-    "function_code": "import subprocess\nimport os\ndef test_app_venv_symlink():\n    \"\"\"Test that /app/.venv symlink works correctly and demonstrate the fix for vLLM.\"\"\"\n    results = []\n    \n    # Check if we're running with RunPod volume\n    has_volume = os.path.exists('/runpod-volume')\n    results.append(f'RunPod volume available: {has_volume}')\n    \n    # Check if /app/.venv exists and is a symlink\n    app_venv_path = '/app/.venv'\n    if os.path.exists(app_venv_path):\n        if os.path.islink(app_venv_path):\n            target = os.readlink(app_venv_path)\n            results.append(f'SUCCESS: {app_venv_path} is symlink -> {target}')\n        else:\n            results.append(f'INFO: {app_venv_path} exists but is not a symlink (expected for local testing)')\n    else:\n        results.append(f'INFO: {app_venv_path} does not exist')\n    \n    # Test if we can access python from /app/.venv/bin/python3\n    try:\n        result = subprocess.run(['/app/.venv/bin/python3', '--version'], capture_output=True, text=True, timeout=5)\n        if result.returncode == 0:\n            results.append(f'SUCCESS: Python accessible from /app/.venv: {result.stdout.strip()}')\n        else:\n            results.append(f'ERROR: Python failed from /app/.venv: {result.stderr}')\n    except subprocess.TimeoutExpired:\n        results.append('ERROR: Python command from /app/.venv timed out')\n    except Exception as e:\n        results.append(f'INFO: Cannot run python from /app/.venv (expected for local): {str(e)}')\n    \n    # Simulate what vLLM would encounter - explain the fix\n    results.append('')\n    results.append('=== vLLM Fix Explanation ===')\n    if has_volume:\n        results.append('With RunPod volume: /app/.venv -> /runpod-volume/runtimes/{endpoint}/.venv')\n        results.append('vLLM subprocess calls to /app/.venv/bin/python3 will use volume venv')\n    else:\n        results.append('Without RunPod volume: /app/.venv is the container default venv')\n        results.append('This is the local testing scenario')\n    \n    return '\\n'.join(results)",
-    "function_name": "test_app_venv_symlink",
-    "args": [],
-    "kwargs": {},
-    "dependencies": []
-  }
-}
\ No newline at end of file
diff --git a/tests/integration/test_dependency_management.py b/tests/integration/test_dependency_management.py
index 16737f3..ad4e1ca 100644
--- a/tests/integration/test_dependency_management.py
+++ b/tests/integration/test_dependency_management.py
@@ -1,5 +1,5 @@
 import pytest
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch, MagicMock, AsyncMock
 from remote_executor import RemoteExecutor
 from remote_execution import FunctionRequest
 
@@ -36,7 +36,6 @@ def test_install_python_dependencies_integration(self):
                 "uv",
                 "pip",
                 "install",
-                "--no-cache-dir",
                 "requests",
                 "numpy",
             ]
@@ -66,7 +65,7 @@ def test_install_system_dependencies_integration(self):
             mock_popen.side_effect = [mock_update_process, mock_install_process]
 
             result = executor.dependency_installer.install_system_dependencies(
-                ["curl", "wget"]
+                ["curl", "wget"], accelerate_downloads=False
             )
 
             assert result.success is True
@@ -113,29 +112,41 @@ def test_with_deps():
 
         with (
             patch.object(
-                executor.dependency_installer, "install_dependencies"
+                executor.dependency_installer,
+                "install_dependencies_async",
+                new_callable=AsyncMock,
             ) as mock_py_deps,
             patch.object(
-                executor.dependency_installer, "install_system_dependencies"
+                executor.dependency_installer,
+                "install_system_dependencies_async",
+                new_callable=AsyncMock,
             ) as mock_sys_deps,
             patch.object(executor.function_executor, "execute") as mock_execute,
         ):
             # Mock successful dependency installations
-            mock_sys_deps.return_value = type(
-                "obj", (object,), {"success": True, "stdout": "system deps installed"}
-            )()
-            mock_py_deps.return_value = type(
-                "obj", (object,), {"success": True, "stdout": "python deps installed"}
-            )()
+            from remote_execution import FunctionResponse
+
+            mock_sys_deps.return_value = FunctionResponse(
+                success=True, stdout="system deps installed"
+            )
+            mock_py_deps.return_value = FunctionResponse(
+                success=True, stdout="python deps installed"
+            )
             mock_execute.return_value = type(
-                "obj", (object,), {"success": True, "result": "encoded_result"}
+                "obj",
+                (object,),
+                {
+                    "success": True,
+                    "result": "encoded_result",
+                    "stdout": "function executed",
+                },
             )()
 
             result = await executor.ExecuteFunction(request)
 
             # Verify all steps were called
-            mock_sys_deps.assert_called_once_with(["curl"])
-            mock_py_deps.assert_called_once_with(["requests"])
+            mock_sys_deps.assert_called_once_with(["curl"], True)
+            mock_py_deps.assert_called_once_with(["requests"], True)
             mock_execute.assert_called_once_with(request)
 
             assert result.success is True
@@ -178,7 +189,9 @@ def test_system_dependency_update_failure(self):
             )
             mock_popen.return_value = mock_process
 
-            result = executor.dependency_installer.install_system_dependencies(["curl"])
+            result = executor.dependency_installer.install_system_dependencies(
+                ["curl"], accelerate_downloads=False
+            )
 
             assert result.success is False
             assert result.error == "Error updating package list"
@@ -198,20 +211,20 @@ async def test_dependency_failure_stops_execution(self):
 
         with (
             patch.object(
-                executor.dependency_installer, "install_dependencies"
+                executor.dependency_installer,
+                "install_dependencies_async",
+                new_callable=AsyncMock,
             ) as mock_deps,
             patch.object(executor.function_executor, "execute") as mock_execute,
         ):
             # Mock failed dependency installation
-            mock_deps.return_value = type(
-                "obj",
-                (object,),
-                {
-                    "success": False,
-                    "error": "Error installing packages",
-                    "stdout": "error details",
-                },
-            )()
+            from remote_execution import FunctionResponse
+
+            mock_deps.return_value = FunctionResponse(
+                success=False,
+                error="Error installing packages",
+                stdout="error details",
+            )
 
             result = await executor.ExecuteFunction(request)
 
@@ -220,7 +233,7 @@ async def test_dependency_failure_stops_execution(self):
 
             # Verify failure response
             assert result.success is False
-            assert result.error == "Error installing packages"
+            assert "Error installing packages" in result.error
 
     @pytest.mark.integration
     def test_empty_dependency_lists(self):
@@ -258,7 +271,6 @@ def test_dependency_command_construction(self):
                 "uv",
                 "pip",
                 "install",
-                "--no-cache-dir",
                 "package1",
                 "package2>=1.0.0",
             ]
@@ -278,7 +290,9 @@ def test_dependency_command_construction(self):
             mock_popen.side_effect = [mock_update, mock_install]
 
             # Test system dependency command
-            executor.dependency_installer.install_system_dependencies(["pkg1", "pkg2"])
+            executor.dependency_installer.install_system_dependencies(
+                ["pkg1", "pkg2"], accelerate_downloads=False
+            )
 
             install_call = mock_popen.call_args_list[1]
             expected_cmd = [
@@ -311,8 +325,180 @@ def test_exception_handling_in_dependency_installation(self):
 
             # Test system dependency exception
             sys_result = executor.dependency_installer.install_system_dependencies(
-                ["some-package"]
+                ["some-package"], accelerate_downloads=False
             )
             assert sys_result.success is False
             assert "Exception during system package installation" in sys_result.error
             assert "Subprocess error" in sys_result.error
+
+    @pytest.mark.integration
+    def test_system_dependency_installation_with_nala_acceleration(self):
+        """Test system dependency installation with nala acceleration enabled."""
+        executor = RemoteExecutor()
+
+        with patch("subprocess.Popen") as mock_popen:
+            # Mock nala availability check
+            nala_check = MagicMock()
+            nala_check.returncode = 0
+            nala_check.communicate.return_value = (b"/usr/bin/nala", b"")
+
+            # Mock nala update
+            nala_update = MagicMock()
+            nala_update.returncode = 0
+            nala_update.communicate.return_value = (b"Reading package lists...", b"")
+
+            # Mock nala install
+            nala_install = MagicMock()
+            nala_install.returncode = 0
+            nala_install.communicate.return_value = (
+                b"Successfully installed build-essential",
+                b"",
+            )
+
+            mock_popen.side_effect = [nala_check, nala_update, nala_install]
+
+            result = executor.dependency_installer.install_system_dependencies(
+                ["build-essential"], accelerate_downloads=True
+            )
+
+            assert result.success is True
+            assert "Installed with nala acceleration" in result.stdout
+
+            # Verify nala commands were used
+            calls = mock_popen.call_args_list
+            assert len(calls) == 3
+            assert calls[0][0][0] == ["which", "nala"]  # Availability check
+            assert calls[1][0][0] == ["nala", "update"]  # Update
+            assert calls[2][0][0] == [
+                "nala",
+                "install",
+                "-y",
+                "build-essential",
+            ]  # Install
+
+    @pytest.mark.integration
+    def test_system_dependency_installation_nala_fallback(self):
+        """Test system dependency installation fallback when nala fails."""
+        executor = RemoteExecutor()
+
+        with patch("subprocess.Popen") as mock_popen:
+            # Mock nala availability check
+            nala_check = MagicMock()
+            nala_check.returncode = 0
+            nala_check.communicate.return_value = (b"/usr/bin/nala", b"")
+
+            # Mock nala update failure
+            nala_update = MagicMock()
+            nala_update.returncode = 1
+            nala_update.communicate.return_value = (b"", b"nala update failed")
+
+            # Mock successful apt-get fallback
+            apt_update = MagicMock()
+            apt_update.returncode = 0
+            apt_update.communicate.return_value = (b"Reading package lists...", b"")
+
+            apt_install = MagicMock()
+            apt_install.returncode = 0
+            apt_install.communicate.return_value = (
+                b"Successfully installed python3-dev",
+                b"",
+            )
+
+            mock_popen.side_effect = [nala_check, nala_update, apt_update, apt_install]
+
+            result = executor.dependency_installer.install_system_dependencies(
+                ["python3-dev"], accelerate_downloads=True
+            )
+
+            assert result.success is True
+            assert "Installed with nala acceleration" not in result.stdout
+
+            # Verify fallback to apt-get was used
+            calls = mock_popen.call_args_list
+            assert len(calls) == 4
+            assert calls[2][0][0] == ["apt-get", "update"]  # apt-get update
+            assert calls[3][0][0] == [
+                "apt-get",
+                "install",
+                "-y",
+                "--no-install-recommends",
+                "python3-dev",
+            ]
+
+    @pytest.mark.integration
+    def test_system_dependency_installation_no_nala_available(self):
+        """Test system dependency installation when nala is not available."""
+        executor = RemoteExecutor()
+
+        with patch("subprocess.Popen") as mock_popen:
+            # Mock nala not available
+            nala_check = MagicMock()
+            nala_check.returncode = 1
+            nala_check.communicate.return_value = (b"", b"which: nala: not found")
+
+            # Mock successful apt-get operations
+            apt_update = MagicMock()
+            apt_update.returncode = 0
+            apt_update.communicate.return_value = (b"Reading package lists...", b"")
+
+            apt_install = MagicMock()
+            apt_install.returncode = 0
+            apt_install.communicate.return_value = (b"Successfully installed gcc", b"")
+
+            mock_popen.side_effect = [nala_check, apt_update, apt_install]
+
+            result = executor.dependency_installer.install_system_dependencies(
+                ["gcc"], accelerate_downloads=True
+            )
+
+            assert result.success is True
+            assert "Installed with nala acceleration" not in result.stdout
+
+            # Verify standard apt-get was used
+            calls = mock_popen.call_args_list
+            assert len(calls) == 3
+            assert calls[1][0][0] == ["apt-get", "update"]
+            assert calls[2][0][0] == [
+                "apt-get",
+                "install",
+                "-y",
+                "--no-install-recommends",
+                "gcc",
+            ]
+
+    @pytest.mark.integration
+    def test_system_dependency_installation_with_small_packages(self):
+        """Test system dependency installation with small packages (no acceleration)."""
+        executor = RemoteExecutor()
+
+        with patch("subprocess.Popen") as mock_popen:
+            # Mock apt-get operations (should be used for small packages)
+            apt_update = MagicMock()
+            apt_update.returncode = 0
+            apt_update.communicate.return_value = (b"Reading package lists...", b"")
+
+            apt_install = MagicMock()
+            apt_install.returncode = 0
+            apt_install.communicate.return_value = (b"Successfully installed nano", b"")
+
+            mock_popen.side_effect = [apt_update, apt_install]
+
+            result = executor.dependency_installer.install_system_dependencies(
+                ["nano", "vim"], accelerate_downloads=True
+            )
+
+            assert result.success is True
+            assert "Installed with nala acceleration" not in result.stdout
+
+            # Should use apt-get because these are not large packages
+            calls = mock_popen.call_args_list
+            assert len(calls) == 2
+            assert calls[0][0][0] == ["apt-get", "update"]
+            assert calls[1][0][0] == [
+                "apt-get",
+                "install",
+                "-y",
+                "--no-install-recommends",
+                "nano",
+                "vim",
+            ]
diff --git a/tests/integration/test_download_acceleration_integration.py b/tests/integration/test_download_acceleration_integration.py
new file mode 100644
index 0000000..1dcea96
--- /dev/null
+++ b/tests/integration/test_download_acceleration_integration.py
@@ -0,0 +1,364 @@
+"""
+Integration tests for download acceleration functionality using hf_transfer.
+"""
+
+import pytest
+import tempfile
+import shutil
+from pathlib import Path
+from unittest.mock import Mock, patch, AsyncMock
+
+from src.download_accelerator import (
+    DownloadAccelerator,
+    HfTransferDownloader,
+)
+from src.huggingface_accelerator import HuggingFaceAccelerator
+from src.dependency_installer import DependencyInstaller
+from src.workspace_manager import WorkspaceManager
+from src.remote_executor import RemoteExecutor
+from src.remote_execution import FunctionRequest
+
+
+class TestDownloadAccelerationIntegration:
+    """Integration tests for download acceleration components."""
+
+    def setup_method(self):
+        """Set up test environment."""
+        self.temp_dir = Path(tempfile.mkdtemp())
+        self.mock_workspace_manager = Mock(spec=WorkspaceManager)
+        self.mock_workspace_manager.has_runpod_volume = True
+        self.mock_workspace_manager.hf_cache_path = str(self.temp_dir / ".hf-cache")
+        self.mock_workspace_manager.workspace_path = str(self.temp_dir)
+        self.mock_workspace_manager.venv_path = str(self.temp_dir / ".venv")
+
+    def teardown_method(self):
+        """Clean up test environment."""
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    @patch("src.download_accelerator.HF_TRANSFER_ENABLED", True)
+    def test_hf_transfer_availability_detection(self):
+        """Test detection of hf_transfer availability."""
+        with patch("importlib.util.find_spec") as mock_find_spec:
+            # Test when hf_transfer is available
+            mock_find_spec.return_value = Mock()  # Not None means available
+            downloader = HfTransferDownloader()
+            assert downloader.hf_transfer_available is True
+
+            # Test when hf_transfer is not available
+            mock_find_spec.return_value = None  # None means not available
+            downloader = HfTransferDownloader()
+            assert downloader.hf_transfer_available is False
+
+    def test_download_accelerator_decision_logic(self):
+        """Test when acceleration should be used."""
+        accelerator = DownloadAccelerator(self.mock_workspace_manager)
+
+        # Mock hf_transfer as available
+        accelerator.hf_transfer_downloader.hf_transfer_available = True
+
+        # Should accelerate large HuggingFace files
+        assert (
+            accelerator.should_accelerate_download(
+                "https://huggingface.co/model/resolve/main/large.bin", 50.0
+            )
+            is True
+        )
+
+        # Should accelerate HuggingFace URLs regardless of size
+        assert (
+            accelerator.should_accelerate_download(
+                "https://huggingface.co/model/resolve/main/file", 5.0
+            )
+            is True
+        )
+
+        # Should not accelerate non-HF files
+        assert (
+            accelerator.should_accelerate_download("http://example.com/large.bin", 50.0)
+            is False
+        )
+        assert (
+            accelerator.should_accelerate_download("http://example.com/small.txt", 1.0)
+            is False
+        )
+
+    @patch("src.huggingface_accelerator.HfApi.repo_info")
+    def test_hf_model_file_fetching(self, mock_repo_info):
+        """Test fetching HuggingFace model file information."""
+        # Mock successful API response using HF Hub's native API
+        from unittest.mock import Mock
+
+        mock_repo_info_obj = Mock()
+        mock_repo_info_obj.siblings = [
+            Mock(rfilename="pytorch_model.bin", size=500 * 1024 * 1024),  # 500MB
+            Mock(rfilename="config.json", size=1024),  # 1KB
+        ]
+        mock_repo_info.return_value = mock_repo_info_obj
+
+        accelerator = HuggingFaceAccelerator(self.mock_workspace_manager)
+        files = accelerator.get_model_files("gpt2")
+
+        assert len(files) == 2
+        assert files[0]["path"] == "pytorch_model.bin"
+        assert files[0]["size"] == 500 * 1024 * 1024
+        assert "huggingface.co/gpt2/resolve/main/pytorch_model.bin" in files[0]["url"]
+
+    def test_hf_model_acceleration_decision(self):
+        """Test when HuggingFace models should be pre-cached."""
+        accelerator = HuggingFaceAccelerator(self.mock_workspace_manager)
+
+        # Should pre-cache known large models (HF handles acceleration automatically)
+        assert accelerator.should_accelerate_model("gpt2") is True
+        assert accelerator.should_accelerate_model("bert-base-uncased") is True
+        assert accelerator.should_accelerate_model("microsoft/DialoGPT-medium") is True
+        assert accelerator.should_accelerate_model("stable-diffusion-v1-5") is True
+
+        # Should not pre-cache unknown/small models
+        assert accelerator.should_accelerate_model("unknown/tiny-model") is False
+
+    @patch("src.workspace_manager.WorkspaceManager.__init__")
+    def test_remote_executor_with_acceleration(self, mock_workspace_init):
+        """Test RemoteExecutor integration with download acceleration."""
+        # Mock workspace manager
+        mock_workspace_init.return_value = None
+
+        executor = RemoteExecutor()
+        executor.workspace_manager = self.mock_workspace_manager
+        executor.workspace_manager.has_runpod_volume = True
+        executor.workspace_manager.initialize_workspace = Mock(
+            return_value=Mock(success=True)
+        )
+        executor.workspace_manager.accelerate_model_download = Mock(
+            return_value=Mock(success=True, stdout="Model cached successfully")
+        )
+
+        # Mock dependency installer
+        executor.dependency_installer = Mock()
+        executor.dependency_installer.install_system_dependencies = Mock(
+            return_value=Mock(success=True, stdout="System deps installed")
+        )
+        executor.dependency_installer.install_dependencies_async = AsyncMock(
+            return_value=Mock(success=True, stdout="Python deps installed")
+        )
+        executor.workspace_manager.accelerate_model_download_async = AsyncMock(
+            return_value=Mock(success=True, stdout="Model cached")
+        )
+        executor.dependency_installer._identify_large_packages = Mock(
+            return_value=["torch", "transformers"]
+        )
+        executor.dependency_installer.download_accelerator = Mock()
+        executor.dependency_installer.download_accelerator.hf_transfer_downloader = (
+            Mock()
+        )
+        executor.dependency_installer.download_accelerator.hf_transfer_downloader.hf_transfer_available = True
+
+        # Mock executors
+        executor.function_executor = Mock()
+        executor.function_executor.execute = Mock(
+            return_value=Mock(success=True, result="Function executed")
+        )
+
+        # Create request with acceleration enabled
+        request = FunctionRequest(
+            function_name="test_function",
+            function_code="def test_function(): return 'test'",
+            dependencies=["torch", "transformers"],
+            accelerate_downloads=True,
+            hf_models_to_cache=["gpt2", "bert-base-uncased"],
+        )
+
+        # Execute function
+        import asyncio
+
+        asyncio.run(executor.ExecuteFunction(request))
+
+        # Verify model caching was attempted (async method is called)
+        assert (
+            executor.workspace_manager.accelerate_model_download_async.call_count == 2
+        )
+        executor.workspace_manager.accelerate_model_download_async.assert_any_call(
+            "gpt2"
+        )
+        executor.workspace_manager.accelerate_model_download_async.assert_any_call(
+            "bert-base-uncased"
+        )
+
+        # Verify dependencies were installed with acceleration enabled (async method)
+        executor.dependency_installer.install_dependencies_async.assert_called_once_with(
+            ["torch", "transformers"], True
+        )
+
+    @patch.dict("os.environ", {"HF_TOKEN": "test_token"})
+    def test_hf_token_authentication(self):
+        """Test that HF_TOKEN is properly used for authentication."""
+        downloader = HfTransferDownloader()
+        # Test that downloader correctly checks for availability
+        # Since hf_transfer may not be installed, this will be False
+        # and that's expected behavior
+        assert isinstance(downloader.hf_transfer_available, bool)
+
+    def test_strategy_selection_logic(self):
+        """Test the download strategy selection logic."""
+        accelerator = DownloadAccelerator(self.mock_workspace_manager)
+        accelerator.hf_transfer_downloader.hf_transfer_available = True
+
+        # Test file caching detection
+        non_existent_file = str(self.temp_dir / "non_existent.bin")
+        existing_file = str(self.temp_dir / "existing.bin")
+
+        # Create existing file
+        Path(existing_file).write_bytes(b"existing data")
+
+        assert accelerator.is_file_cached(non_existent_file) is False
+        assert accelerator.is_file_cached(existing_file) is True
+
+    def test_fallback_behavior_without_accelerators(self):
+        """Test graceful fallback when accelerators are not available."""
+        accelerator = DownloadAccelerator(self.mock_workspace_manager)
+        accelerator.hf_transfer_downloader.hf_transfer_available = False
+
+        # With new logic, when acceleration is not available, we defer to HF native handling
+        result = accelerator.download_with_fallback(
+            "https://huggingface.co/gpt2/resolve/main/file.bin",
+            str(self.temp_dir / "file.bin"),
+        )
+
+        # Should return failure and defer to HF native handling
+        assert result.success is False
+        assert "defer to HF native handling" in result.error
+
+    @patch("src.dependency_installer.subprocess.Popen")
+    def test_dependency_installation_without_acceleration(self, mock_popen):
+        """Test that packages install normally without aria2c acceleration."""
+        # Mock successful installation
+        mock_process = Mock()
+        mock_process.returncode = 0
+        mock_process.communicate.return_value = (b"Installed successfully", b"")
+        mock_popen.return_value = mock_process
+
+        installer = DependencyInstaller(self.mock_workspace_manager)
+
+        # Install packages
+        packages = ["torch==2.0.0", "transformers>=4.20.0"]
+        result = installer.install_dependencies(packages)
+
+        assert result.success is True
+
+        # Verify the installation was called
+        mock_popen.assert_called_once()
+        args, _ = mock_popen.call_args
+        assert set(packages).issubset(args[0])
+
+    @patch("src.hf_downloader_tetra.DownloadAccelerator")
+    def test_model_cache_management(self, mock_download_accelerator):
+        """Test model cache information and management using tetra strategy."""
+        accelerator = HuggingFaceAccelerator(self.mock_workspace_manager)
+
+        # Test cache info for non-existent model
+        cache_info = accelerator.get_cache_info("non-existent-model")
+        assert cache_info["cached"] is False
+        assert cache_info["cache_size_mb"] == 0
+        assert cache_info["file_count"] == 0
+
+        # Create mock cache files for existing model
+        model_cache_dir = self.temp_dir / ".hf-cache" / "transformers" / "gpt2"
+        model_cache_dir.mkdir(parents=True, exist_ok=True)
+
+        # Create mock model files
+        config_file = model_cache_dir / "config.json"
+        model_file = model_cache_dir / "pytorch_model.bin"
+
+        config_file.write_text('{"model_type": "gpt2"}')  # ~25 bytes
+        model_file.write_bytes(b"0" * (150 * 1024 * 1024))  # 150MB of zeros
+
+        # Test cache info for cached model
+        cache_info = accelerator.get_cache_info("gpt2")
+        assert cache_info["cached"] is True
+        assert (
+            abs(cache_info["cache_size_mb"] - 150.0) < 0.1
+        )  # Allow for small differences
+        assert cache_info["file_count"] == 2
+
+        # Test cache clearing
+        result = accelerator.clear_model_cache("gpt2")
+        assert result.success is True
+        assert not model_cache_dir.exists()
+
+
+class TestDownloadAccelerationErrorHandling:
+    """Test error handling and edge cases in download acceleration."""
+
+    def setup_method(self):
+        """Set up test environment."""
+        self.temp_dir = Path(tempfile.mkdtemp())
+
+    def teardown_method(self):
+        """Clean up test environment."""
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_hf_transfer_download_failure_fallback(self):
+        """Test fallback to standard download when hf_transfer fails."""
+        downloader = HfTransferDownloader()
+
+        # Test that unavailable downloader raises error
+        if not downloader.hf_transfer_available:
+            try:
+                result = downloader.download(
+                    "https://huggingface.co/gpt2/resolve/main/file.bin",
+                    str(self.temp_dir / "file.bin"),
+                )
+                assert not result.success
+            except RuntimeError as e:
+                assert "hf_transfer not available" in str(e)
+
+    @patch("src.huggingface_accelerator.HfApi.repo_info")
+    def test_hf_api_failure_handling(self, mock_repo_info):
+        """Test handling of HuggingFace API failures."""
+        # Mock API failure
+        mock_repo_info.side_effect = Exception("API error")
+
+        accelerator = HuggingFaceAccelerator(None)
+        files = accelerator.get_model_files("gpt2")
+
+        # Should return empty list on failure
+        assert files == []
+
+    def test_invalid_model_acceleration(self):
+        """Test acceleration with invalid model specifications."""
+        mock_workspace = Mock()
+        mock_workspace.has_runpod_volume = True
+        mock_workspace.hf_cache_path = str(self.temp_dir)
+
+        accelerator = HuggingFaceAccelerator(mock_workspace)
+
+        # Test with empty model ID - should return success but indicate no pre-caching needed
+        result = accelerator.accelerate_model_download("")
+        assert result.success is True
+        assert result.stdout is not None
+        assert "does not require acceleration" in result.stdout
+
+    def test_non_hf_url_handling(self):
+        """Test handling of non-HuggingFace URLs."""
+        downloader = HfTransferDownloader()
+
+        # Test error handling for non-HF URLs when downloader is available
+        if downloader.hf_transfer_available:
+            result = downloader.download(
+                "http://example.com/file.bin", str(self.temp_dir / "file.bin")
+            )
+            assert result.success is False
+            assert result.error_message is not None
+            assert "only supports HuggingFace URLs" in result.error_message
+        else:
+            # When not available, should raise RuntimeError
+            try:
+                result = downloader.download(
+                    "http://example.com/file.bin", str(self.temp_dir / "file.bin")
+                )
+                assert not result.success
+            except RuntimeError as e:
+                assert "hf_transfer not available" in str(e)
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])
diff --git a/tests/integration/test_handler_integration.py b/tests/integration/test_handler_integration.py
index 592bce7..f12bc4b 100644
--- a/tests/integration/test_handler_integration.py
+++ b/tests/integration/test_handler_integration.py
@@ -13,7 +13,7 @@ class TestHandlerIntegration:
 
     def setup_method(self):
         """Setup for each test method."""
-        self.test_data_dir = Path(__file__).parent.parent.parent
+        self.test_data_dir = Path(__file__).parent.parent.parent / "src"
         self.test_input_file = self.test_data_dir / "test_input.json"
         self.test_class_input_file = self.test_data_dir / "test_class_input.json"
 
diff --git a/tests/integration/test_hf_strategy_integration.py b/tests/integration/test_hf_strategy_integration.py
new file mode 100644
index 0000000..dd07bcf
--- /dev/null
+++ b/tests/integration/test_hf_strategy_integration.py
@@ -0,0 +1,162 @@
+"""
+Integration tests for HuggingFace download strategy system.
+"""
+
+import os
+import pytest
+from unittest.mock import Mock, patch
+
+from src.huggingface_accelerator import HuggingFaceAccelerator
+from src.hf_strategy_factory import HFStrategyFactory
+from hf_downloader_tetra import TetraHFDownloader
+from hf_downloader_native import NativeHFDownloader
+
+
+@pytest.fixture
+def mock_workspace_manager():
+    """Mock workspace manager for integration tests."""
+    workspace_manager = Mock()
+    workspace_manager.hf_cache_path = "/tmp/test_cache"
+    return workspace_manager
+
+
+class TestHuggingFaceAcceleratorIntegration:
+    """Integration tests for HuggingFaceAccelerator with strategy pattern."""
+
+    def test_accelerator_uses_configured_strategy(self, mock_workspace_manager):
+        """Test that accelerator uses the configured strategy."""
+        # Set environment to use tetra strategy
+        os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "tetra"
+
+        with patch("src.hf_downloader_tetra.DownloadAccelerator"):
+            accelerator = HuggingFaceAccelerator(mock_workspace_manager)
+            assert isinstance(accelerator.strategy, TetraHFDownloader)
+
+    def test_accelerator_strategy_delegation(self, mock_workspace_manager):
+        """Test that accelerator properly delegates to strategy methods."""
+        # Set to native strategy for simpler testing
+        os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "native"
+
+        accelerator = HuggingFaceAccelerator(mock_workspace_manager)
+
+        # Mock the strategy methods
+        accelerator.strategy.should_accelerate = Mock(return_value=True)
+        accelerator.strategy.download_model = Mock(return_value=Mock(success=True))
+        accelerator.strategy.is_model_cached = Mock(return_value=False)
+        accelerator.strategy.get_cache_info = Mock(return_value={"cached": False})
+        accelerator.strategy.clear_model_cache = Mock(return_value=Mock(success=True))
+
+        # Test delegation
+        assert accelerator.should_accelerate_model("gpt2")
+        accelerator.strategy.should_accelerate.assert_called_once_with("gpt2")
+
+        accelerator.accelerate_model_download("gpt2", "main")
+        accelerator.strategy.download_model.assert_called_once_with("gpt2", "main")
+
+        assert not accelerator.is_model_cached("gpt2", "main")
+        accelerator.strategy.is_model_cached.assert_called_once_with("gpt2", "main")
+
+        cache_info = accelerator.get_cache_info("gpt2")
+        assert cache_info == {"cached": False}
+        accelerator.strategy.get_cache_info.assert_called_once_with("gpt2")
+
+        accelerator.clear_model_cache("gpt2")
+        accelerator.strategy.clear_model_cache.assert_called_once_with("gpt2")
+
+    def test_accelerator_strategy_switching(self, mock_workspace_manager):
+        """Test runtime strategy switching."""
+        # Start with native strategy
+        os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "native"
+
+        accelerator = HuggingFaceAccelerator(mock_workspace_manager)
+        assert isinstance(accelerator.strategy, NativeHFDownloader)
+
+        # Switch to tetra strategy
+        with patch("src.hf_downloader_tetra.DownloadAccelerator"):
+            accelerator.set_strategy("tetra")
+            assert isinstance(accelerator.strategy, TetraHFDownloader)
+
+        # Check environment was updated
+        assert os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] == "tetra"
+
+    def test_accelerator_get_strategy_info(self, mock_workspace_manager):
+        """Test getting strategy information from accelerator."""
+        os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "native"
+
+        accelerator = HuggingFaceAccelerator(mock_workspace_manager)
+        info = accelerator.get_strategy_info()
+
+        assert info["current_strategy"] == "native"
+        assert info["strategy_instance"] == "NativeHFDownloader"
+        assert info["environment_variable"] == HFStrategyFactory.STRATEGY_ENV_VAR
+
+
+class TestStrategyEnvironmentIntegration:
+    """Test environment variable integration across the system."""
+
+    def test_strategy_persistence_across_instances(self, mock_workspace_manager):
+        """Test that strategy setting persists across new instances."""
+        # Set strategy
+        HFStrategyFactory.set_strategy("tetra")
+
+        # Create first instance
+        with patch("src.hf_downloader_tetra.DownloadAccelerator"):
+            accelerator1 = HuggingFaceAccelerator(mock_workspace_manager)
+            assert isinstance(accelerator1.strategy, TetraHFDownloader)
+
+        # Create second instance - should use same strategy
+        with patch("src.hf_downloader_tetra.DownloadAccelerator"):
+            accelerator2 = HuggingFaceAccelerator(mock_workspace_manager)
+            assert isinstance(accelerator2.strategy, TetraHFDownloader)
+
+    def test_invalid_strategy_fallback(self, mock_workspace_manager):
+        """Test fallback behavior with invalid strategy."""
+        # Set invalid strategy
+        os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "invalid_strategy"
+
+        with patch("src.hf_downloader_tetra.DownloadAccelerator"):
+            accelerator = HuggingFaceAccelerator(mock_workspace_manager)
+            # Should fallback to tetra (default)
+            assert isinstance(accelerator.strategy, TetraHFDownloader)
+
+    def test_no_env_var_uses_default(self, mock_workspace_manager):
+        """Test default strategy when no environment variable is set."""
+        # Clear environment variable
+        if HFStrategyFactory.STRATEGY_ENV_VAR in os.environ:
+            del os.environ[HFStrategyFactory.STRATEGY_ENV_VAR]
+
+        with patch("src.hf_downloader_tetra.DownloadAccelerator"):
+            accelerator = HuggingFaceAccelerator(mock_workspace_manager)
+            # Should use default (tetra)
+            assert isinstance(accelerator.strategy, TetraHFDownloader)
+
+
+class TestWorkspaceManagerIntegration:
+    """Test integration with workspace manager."""
+
+    def test_strategy_uses_workspace_cache_path(self):
+        """Test that strategies use workspace manager's cache path."""
+        import tempfile
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            workspace_manager = Mock()
+            workspace_manager.hf_cache_path = temp_dir
+
+            # Test tetra strategy
+            with patch("src.hf_downloader_tetra.DownloadAccelerator"):
+                tetra_strategy = TetraHFDownloader(workspace_manager)
+                assert str(tetra_strategy.cache_dir) == temp_dir
+
+            # Test native strategy (doesn't use cache_dir directly but should store workspace_manager)
+            native_strategy = NativeHFDownloader(workspace_manager)
+            assert native_strategy.workspace_manager == workspace_manager
+
+    def test_strategy_with_no_cache_path(self):
+        """Test strategy behavior when workspace manager has no cache path."""
+        workspace_manager = Mock()
+        workspace_manager.hf_cache_path = None
+
+        with patch("src.hf_downloader_tetra.DownloadAccelerator"):
+            tetra_strategy = TetraHFDownloader(workspace_manager)
+            # Should fall back to default cache location
+            assert "huggingface" in str(tetra_strategy.cache_dir)
diff --git a/tests/integration/test_runpod_volume_integration.py b/tests/integration/test_runpod_volume_integration.py
index 6a81843..64ae524 100644
--- a/tests/integration/test_runpod_volume_integration.py
+++ b/tests/integration/test_runpod_volume_integration.py
@@ -4,16 +4,31 @@
 import base64
 import cloudpickle
 import threading
-from unittest.mock import Mock, patch
+from unittest.mock import Mock, patch, MagicMock
 
-from handler import RemoteExecutor, handler
-from remote_execution import FunctionResponse
-from constants import RUNPOD_VOLUME_PATH, VENV_DIR_NAME, RUNTIMES_DIR_NAME
+from src.handler import RemoteExecutor, handler
+from src.remote_execution import FunctionResponse
+from src.constants import RUNPOD_VOLUME_PATH, VENV_DIR_NAME, RUNTIMES_DIR_NAME
 
 
 class TestFullWorkflowWithVolume:
     """Test complete request workflows with volume integration."""
 
+    def setup_method(self):
+        # Patch subprocess.run globally for all tests in this class
+        class ContextManagerMock(MagicMock):
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc_val, exc_tb):
+                pass
+
+        self.subprocess_run_patcher = patch("subprocess.run", new=ContextManagerMock())
+        self.subprocess_run_patcher.start()
+
+    def teardown_method(self):
+        self.subprocess_run_patcher.stop()
+
     @patch("os.makedirs")
     @patch("workspace_manager.WorkspaceManager._validate_virtual_environment")
     @patch("os.path.exists")
@@ -80,8 +95,15 @@ def numpy_test():
 
             # Should have installed dependencies
             assert mock_popen.called
-            install_command = mock_popen.call_args[0][0]
-            assert "numpy==1.21.0" in " ".join(install_command)
+            # Check that a uv pip install command was made with numpy
+            popen_calls = [call[0][0] for call in mock_popen.call_args_list]
+            install_calls = [
+                call
+                for call in popen_calls
+                if "uv" in call and "pip" in call and "install" in call
+            ]
+            assert len(install_calls) > 0
+            assert any("numpy==1.21.0" in " ".join(call) for call in install_calls)
 
     @patch("os.makedirs")
     @patch("workspace_manager.WorkspaceManager._validate_virtual_environment")
@@ -142,10 +164,21 @@ async def test_workflow_with_system_dependencies(
             b"",
         )
 
+        # Mock subprocess calls in order:
+        # 1. which nala (system package acceleration check)
+        # 2. apt-get update
+        # 3. apt-get install
+        # 4. uv pip list (get installed packages)
+        # 5. uv pip install
+        nala_check_process = Mock()
+        nala_check_process.returncode = 1  # nala not available
+        nala_check_process.communicate.return_value = (b"", b"which: nala: not found")
+
         mock_popen.side_effect = [
+            nala_check_process,
             apt_update_process,
             apt_install_process,
-            pip_list_process,  # Added missing call
+            pip_list_process,
             pip_install_process,
         ]
 
@@ -161,12 +194,12 @@ async def test_workflow_with_system_dependencies(
                         "function_code": """
 def system_test():
     import subprocess
-    result = subprocess.run(['which', 'curl'], capture_output=True, text=True)
+    result = subprocess.run(['which', 'wget'], capture_output=True, text=True)
     return result.stdout.strip()
 """,
                         "args": [],
                         "kwargs": {},
-                        "system_dependencies": ["curl"],
+                        "system_dependencies": ["wget"],
                         "dependencies": ["requests==2.25.1"],
                     }
                 }
@@ -177,17 +210,35 @@ def system_test():
                 assert result["success"] is True
 
                 # Should have called apt-get update and install
-                calls = [call[0][0] for call in mock_popen.call_args_list]
-        assert any("apt-get" in " ".join(call) and "update" in call for call in calls)
-        assert any("apt-get" in " ".join(call) and "curl" in call for call in calls)
-        assert any(
-            "uv" in call and "requests==2.25.1" in " ".join(call) for call in calls
-        )
+                popen_calls = [call[0][0] for call in mock_popen.call_args_list]
+                assert any(
+                    "apt-get" in " ".join(call) and "wget" in " ".join(call)
+                    for call in popen_calls
+                )
+                assert any(
+                    "uv" in " ".join(call) and "requests==2.25.1" in " ".join(call)
+                    for call in popen_calls
+                )
 
 
 class TestConcurrentRequests:
     """Test realistic concurrent access scenarios."""
 
+    def setup_method(self):
+        # Patch subprocess.run globally for all tests in this class
+        class ContextManagerMock(MagicMock):
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc_val, exc_tb):
+                pass
+
+        self.subprocess_run_patcher = patch("subprocess.run", new=ContextManagerMock())
+        self.subprocess_run_patcher.start()
+
+    def teardown_method(self):
+        self.subprocess_run_patcher.stop()
+
     @patch("os.makedirs")
     @patch("workspace_manager.WorkspaceManager._validate_virtual_environment")
     @patch("os.path.exists")
@@ -331,6 +382,21 @@ def install_deps(executor, packages):
 class TestMixedExecution:
     """Test mixed volume and non-volume execution scenarios."""
 
+    def setup_method(self):
+        # Patch subprocess.run globally for all tests in this class
+        class ContextManagerMock(MagicMock):
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc_val, exc_tb):
+                pass
+
+        self.subprocess_run_patcher = patch("subprocess.run", new=ContextManagerMock())
+        self.subprocess_run_patcher.start()
+
+    def teardown_method(self):
+        self.subprocess_run_patcher.stop()
+
     @patch("os.makedirs")
     @patch("workspace_manager.WorkspaceManager._validate_virtual_environment")
     @patch("os.path.exists")
@@ -395,11 +461,10 @@ async def test_fallback_on_volume_initialization_failure(
         )  # Volume exists but venv doesn't exist
 
         # Mock file operations
-        mock_file = Mock()
+        mock_file = MagicMock()
         mock_file.fileno.return_value = 3
         mock_open.return_value.__enter__.return_value = mock_file
 
-        # Mock failed virtual environment creation
         mock_process = Mock()
         mock_process.returncode = 1
         mock_process.communicate.return_value = (b"", b"Failed to create venv")
@@ -426,6 +491,21 @@ async def test_fallback_on_volume_initialization_failure(
 class TestErrorHandlingIntegration:
     """Test error handling in integrated volume scenarios."""
 
+    def setup_method(self):
+        # Patch subprocess.run globally for all tests in this class
+        class ContextManagerMock(MagicMock):
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc_val, exc_tb):
+                pass
+
+        self.subprocess_run_patcher = patch("subprocess.run", new=ContextManagerMock())
+        self.subprocess_run_patcher.start()
+
+    def teardown_method(self):
+        self.subprocess_run_patcher.stop()
+
     @patch("os.makedirs")
     @patch("workspace_manager.WorkspaceManager._validate_virtual_environment")
     @patch("os.path.exists")
diff --git a/tests/unit/test_dependency_installer.py b/tests/unit/test_dependency_installer.py
index d3760c2..6911f64 100644
--- a/tests/unit/test_dependency_installer.py
+++ b/tests/unit/test_dependency_installer.py
@@ -30,7 +30,9 @@ def test_install_system_dependencies_success(self, mock_popen):
 
         mock_popen.side_effect = [update_process, install_process]
 
-        result = self.installer.install_system_dependencies(["curl", "wget"])
+        result = self.installer.install_system_dependencies(
+            ["curl", "wget"], accelerate_downloads=False
+        )
 
         assert result.success is True
         assert "Installed packages" in result.stdout
@@ -45,7 +47,9 @@ def test_install_system_dependencies_update_failure(self, mock_popen):
 
         mock_popen.return_value = update_process
 
-        result = self.installer.install_system_dependencies(["curl"])
+        result = self.installer.install_system_dependencies(
+            ["curl"], accelerate_downloads=False
+        )
 
         assert result.success is False
         assert "Error updating package list" in result.error
@@ -103,6 +107,75 @@ def test_install_dependencies_empty_list(self):
         assert result.success is True
         assert "No packages to install" in result.stdout
 
+    @patch("subprocess.Popen")
+    @patch("importlib.invalidate_caches")
+    def test_install_dependencies_with_acceleration_enabled(
+        self, mock_invalidate, mock_popen
+    ):
+        """Test Python dependency installation with acceleration enabled (uses UV)."""
+        process = Mock()
+        process.returncode = 0
+        process.communicate.return_value = (b"Successfully installed with UV", b"")
+        mock_popen.return_value = process
+
+        result = self.installer.install_dependencies(
+            ["requests", "numpy"], accelerate_downloads=True
+        )
+
+        assert result.success is True
+        assert "Successfully installed with UV" in result.stdout
+        # Verify UV was used
+        mock_popen.assert_called_once()
+        args = mock_popen.call_args[0][0]
+        assert args[0] == "uv"
+        assert args[1] == "pip"
+        assert args[2] == "install"
+        mock_invalidate.assert_called_once()
+
+    @patch("subprocess.Popen")
+    @patch("importlib.invalidate_caches")
+    def test_install_dependencies_with_acceleration_disabled(
+        self, mock_invalidate, mock_popen
+    ):
+        """Test Python dependency installation with acceleration disabled (uses UV)."""
+        process = Mock()
+        process.returncode = 0
+        process.communicate.return_value = (b"Successfully installed with UV", b"")
+        mock_popen.return_value = process
+
+        result = self.installer.install_dependencies(
+            ["requests", "numpy"], accelerate_downloads=False
+        )
+
+        assert result.success is True
+        assert "Successfully installed with UV" in result.stdout
+        # Verify UV was used
+        mock_popen.assert_called_once()
+        args = mock_popen.call_args[0][0]
+        assert args[0] == "uv"
+        assert args[1] == "pip"
+        assert args[2] == "install"
+        mock_invalidate.assert_called_once()
+
+    @patch("subprocess.Popen")
+    def test_install_dependencies_uv_failure(self, mock_popen):
+        """Test Python dependency installation failure using UV."""
+        process = Mock()
+        process.returncode = 1
+        process.communicate.return_value = (b"", b"Package not found")
+        mock_popen.return_value = process
+
+        result = self.installer.install_dependencies(
+            ["nonexistent-package"], accelerate_downloads=False
+        )
+
+        assert result.success is False
+        assert "Error installing packages" in result.error
+        # Verify UV was used
+        args = mock_popen.call_args[0][0]
+        assert args[0] == "uv"
+        assert args[1] == "pip"
+
 
 class TestDifferentialInstallation:
     """Test differential package installation with volume."""
@@ -171,3 +244,212 @@ def test_skip_already_installed_packages(self, mock_popen, mock_exists):
 
         assert result.success is True
         assert "All packages already installed" in result.stdout
+
+
+class TestSystemPackageAcceleration:
+    """Test system package acceleration with nala."""
+
+    def setup_method(self):
+        """Setup for each test method."""
+        self.workspace_manager = Mock(spec=WorkspaceManager)
+        self.installer = DependencyInstaller(self.workspace_manager)
+
+    @patch("subprocess.Popen")
+    def test_nala_availability_check_available(self, mock_popen):
+        """Test nala availability detection when nala is available."""
+        process = Mock()
+        process.returncode = 0
+        process.communicate.return_value = (b"/usr/bin/nala", b"")
+        mock_popen.return_value = process
+
+        # First call should check availability
+        assert self.installer._check_nala_available() is True
+
+        # Second call should use cached result
+        assert self.installer._check_nala_available() is True
+
+        # Should only call subprocess once due to caching
+        assert mock_popen.call_count == 1
+
+    @patch("subprocess.Popen")
+    def test_nala_availability_check_unavailable(self, mock_popen):
+        """Test nala availability detection when nala is not available."""
+        process = Mock()
+        process.returncode = 1
+        process.communicate.return_value = (b"", b"which: nala: not found")
+        mock_popen.return_value = process
+
+        assert self.installer._check_nala_available() is False
+
+    @patch("subprocess.Popen")
+    def test_nala_availability_check_exception(self, mock_popen):
+        """Test nala availability detection when subprocess raises exception."""
+        mock_popen.side_effect = Exception("Command failed")
+
+        assert self.installer._check_nala_available() is False
+
+    def test_identify_large_system_packages(self):
+        """Test identification of large system packages."""
+        packages = ["build-essential", "curl", "python3-dev", "nano", "gcc"]
+        large_packages = self.installer._identify_large_system_packages(packages)
+
+        expected = ["build-essential", "curl", "python3-dev", "gcc"]
+        assert set(large_packages) == set(expected)
+
+    def test_identify_large_system_packages_empty(self):
+        """Test identification when no large packages are present."""
+        packages = ["nano", "vim", "htop"]
+        large_packages = self.installer._identify_large_system_packages(packages)
+
+        assert large_packages == []
+
+    @patch("subprocess.Popen")
+    def test_install_system_with_nala_success(self, mock_popen):
+        """Test successful system package installation with nala."""
+        # Mock nala update
+        update_process = Mock()
+        update_process.returncode = 0
+        update_process.communicate.return_value = (b"Updated with nala", b"")
+
+        # Mock nala install
+        install_process = Mock()
+        install_process.returncode = 0
+        install_process.communicate.return_value = (b"Installed with nala", b"")
+
+        mock_popen.side_effect = [update_process, install_process]
+
+        result = self.installer._install_system_with_nala(["build-essential"])
+
+        assert result.success is True
+        assert "Installed with nala acceleration" in result.stdout
+        assert mock_popen.call_count == 2
+
+    @patch("subprocess.Popen")
+    def test_install_system_with_nala_update_failure_fallback(self, mock_popen):
+        """Test nala installation fallback when update fails."""
+        # Mock failed nala update
+        update_process = Mock()
+        update_process.returncode = 1
+        update_process.communicate.return_value = (b"", b"Update failed")
+
+        # Mock successful apt-get operations for fallback
+        apt_update_process = Mock()
+        apt_update_process.returncode = 0
+        apt_update_process.communicate.return_value = (b"Updated", b"")
+
+        apt_install_process = Mock()
+        apt_install_process.returncode = 0
+        apt_install_process.communicate.return_value = (b"Installed", b"")
+
+        mock_popen.side_effect = [
+            update_process,
+            apt_update_process,
+            apt_install_process,
+        ]
+
+        result = self.installer._install_system_with_nala(["build-essential"])
+
+        assert result.success is True
+        assert "Installed with nala acceleration" not in result.stdout
+
+    @patch("subprocess.Popen")
+    def test_install_system_with_nala_install_failure_fallback(self, mock_popen):
+        """Test nala installation fallback when install fails."""
+        # Mock successful nala update
+        update_process = Mock()
+        update_process.returncode = 0
+        update_process.communicate.return_value = (b"Updated", b"")
+
+        # Mock failed nala install
+        install_process = Mock()
+        install_process.returncode = 1
+        install_process.communicate.return_value = (b"", b"Install failed")
+
+        # Mock successful apt-get operations for fallback
+        apt_update_process = Mock()
+        apt_update_process.returncode = 0
+        apt_update_process.communicate.return_value = (b"Updated", b"")
+
+        apt_install_process = Mock()
+        apt_install_process.returncode = 0
+        apt_install_process.communicate.return_value = (b"Installed", b"")
+
+        mock_popen.side_effect = [
+            update_process,
+            install_process,
+            apt_update_process,
+            apt_install_process,
+        ]
+
+        result = self.installer._install_system_with_nala(["build-essential"])
+
+        assert result.success is True
+        assert "Installed with nala acceleration" not in result.stdout
+
+    @patch("subprocess.Popen")
+    def test_install_system_dependencies_with_acceleration(self, mock_popen):
+        """Test system dependency installation with acceleration enabled."""
+        # Mock nala availability check
+        nala_check = Mock()
+        nala_check.returncode = 0
+        nala_check.communicate.return_value = (b"/usr/bin/nala", b"")
+
+        # Mock nala operations
+        nala_update = Mock()
+        nala_update.returncode = 0
+        nala_update.communicate.return_value = (b"Updated", b"")
+
+        nala_install = Mock()
+        nala_install.returncode = 0
+        nala_install.communicate.return_value = (b"Installed with nala", b"")
+
+        mock_popen.side_effect = [nala_check, nala_update, nala_install]
+
+        result = self.installer.install_system_dependencies(
+            ["build-essential", "python3-dev"], accelerate_downloads=True
+        )
+
+        assert result.success is True
+        assert "Installed with nala acceleration" in result.stdout
+
+    @patch("subprocess.Popen")
+    def test_install_system_dependencies_without_acceleration(self, mock_popen):
+        """Test system dependency installation with acceleration disabled."""
+        # Mock apt-get operations
+        apt_update = Mock()
+        apt_update.returncode = 0
+        apt_update.communicate.return_value = (b"Updated", b"")
+
+        apt_install = Mock()
+        apt_install.returncode = 0
+        apt_install.communicate.return_value = (b"Installed", b"")
+
+        mock_popen.side_effect = [apt_update, apt_install]
+
+        result = self.installer.install_system_dependencies(
+            ["build-essential"], accelerate_downloads=False
+        )
+
+        assert result.success is True
+        assert "Installed with nala acceleration" not in result.stdout
+
+    @patch("subprocess.Popen")
+    def test_install_system_dependencies_no_large_packages(self, mock_popen):
+        """Test system dependency installation when no large packages are present."""
+        # Mock apt-get operations (should fallback to standard)
+        apt_update = Mock()
+        apt_update.returncode = 0
+        apt_update.communicate.return_value = (b"Updated", b"")
+
+        apt_install = Mock()
+        apt_install.returncode = 0
+        apt_install.communicate.return_value = (b"Installed", b"")
+
+        mock_popen.side_effect = [apt_update, apt_install]
+
+        result = self.installer.install_system_dependencies(
+            ["nano", "vim"], accelerate_downloads=True
+        )
+
+        assert result.success is True
+        assert "Installed with nala acceleration" not in result.stdout
diff --git a/tests/unit/test_hf_download_strategies.py b/tests/unit/test_hf_download_strategies.py
new file mode 100644
index 0000000..898ab17
--- /dev/null
+++ b/tests/unit/test_hf_download_strategies.py
@@ -0,0 +1,260 @@
+"""
+Unit tests for HuggingFace download strategies.
+"""
+
+import os
+import pytest
+from unittest.mock import Mock, patch
+
+from src.hf_downloader_tetra import TetraHFDownloader
+from src.hf_downloader_native import NativeHFDownloader
+from src.hf_strategy_factory import HFStrategyFactory
+from src.remote_execution import FunctionResponse
+
+
+@pytest.fixture
+def mock_workspace_manager():
+    """Mock workspace manager."""
+    workspace_manager = Mock()
+    workspace_manager.hf_cache_path = "/tmp/test_cache"
+    return workspace_manager
+
+
+@pytest.fixture
+def mock_download_accelerator():
+    """Mock download accelerator."""
+    accelerator = Mock()
+    accelerator.hf_transfer_downloader = Mock()
+    accelerator.hf_transfer_downloader.hf_transfer_available = True
+    return accelerator
+
+
+class TestHFStrategyFactory:
+    """Tests for HF strategy factory."""
+
+    def test_get_available_strategies(self):
+        """Test getting available strategies."""
+        strategies = HFStrategyFactory.get_available_strategies()
+        assert HFStrategyFactory.TETRA_STRATEGY in strategies
+        assert HFStrategyFactory.NATIVE_STRATEGY in strategies
+
+    def test_get_configured_strategy_default(self):
+        """Test default strategy when no env var set."""
+        # Clear environment variable
+        if HFStrategyFactory.STRATEGY_ENV_VAR in os.environ:
+            del os.environ[HFStrategyFactory.STRATEGY_ENV_VAR]
+
+        strategy = HFStrategyFactory.get_configured_strategy()
+        assert strategy == HFStrategyFactory.DEFAULT_STRATEGY
+
+    def test_get_configured_strategy_from_env(self):
+        """Test getting strategy from environment variable."""
+        os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "tetra"
+        strategy = HFStrategyFactory.get_configured_strategy()
+        assert strategy == "tetra"
+
+    def test_get_configured_strategy_invalid_fallback(self):
+        """Test fallback to default for invalid strategy."""
+        os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "invalid_strategy"
+        strategy = HFStrategyFactory.get_configured_strategy()
+        assert strategy == HFStrategyFactory.DEFAULT_STRATEGY
+
+    def test_create_tetra_strategy(self, mock_workspace_manager):
+        """Test creating tetra strategy."""
+        with patch("src.hf_strategy_factory.TetraHFDownloader") as mock_tetra:
+            mock_instance = Mock()
+            mock_tetra.return_value = mock_instance
+
+            strategy = HFStrategyFactory.create_strategy(
+                mock_workspace_manager, HFStrategyFactory.TETRA_STRATEGY
+            )
+
+            mock_tetra.assert_called_once_with(mock_workspace_manager)
+            assert strategy == mock_instance
+
+    def test_create_native_strategy(self, mock_workspace_manager):
+        """Test creating native strategy."""
+        with patch("src.hf_strategy_factory.NativeHFDownloader") as mock_native:
+            mock_instance = Mock()
+            mock_native.return_value = mock_instance
+
+            strategy = HFStrategyFactory.create_strategy(
+                mock_workspace_manager, HFStrategyFactory.NATIVE_STRATEGY
+            )
+
+            mock_native.assert_called_once_with(mock_workspace_manager)
+            assert strategy == mock_instance
+
+    def test_set_strategy(self):
+        """Test setting strategy environment variable."""
+        HFStrategyFactory.set_strategy("tetra")
+        assert os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] == "tetra"
+
+    def test_set_strategy_invalid(self):
+        """Test setting invalid strategy raises error."""
+        with pytest.raises(ValueError):
+            HFStrategyFactory.set_strategy("invalid_strategy")
+
+    def test_get_strategy_info(self):
+        """Test getting strategy information."""
+        os.environ[HFStrategyFactory.STRATEGY_ENV_VAR] = "tetra"
+
+        info = HFStrategyFactory.get_strategy_info()
+
+        assert info["current_strategy"] == "tetra"
+        assert info["environment_variable"] == HFStrategyFactory.STRATEGY_ENV_VAR
+        assert info["environment_value"] == "tetra"
+        assert info["default_strategy"] == HFStrategyFactory.DEFAULT_STRATEGY
+        assert "tetra" in info["available_strategies"]
+        assert "native" in info["available_strategies"]
+
+
+class TestTetraHFDownloader:
+    """Tests for Tetra HF downloader strategy."""
+
+    def test_init(self, mock_workspace_manager):
+        """Test TetraHFDownloader initialization."""
+        with patch(
+            "src.hf_downloader_tetra.DownloadAccelerator"
+        ) as mock_accelerator_class:
+            downloader = TetraHFDownloader(mock_workspace_manager)
+
+            assert downloader.workspace_manager == mock_workspace_manager
+            mock_accelerator_class.assert_called_once_with(mock_workspace_manager)
+
+    def test_should_accelerate_with_hf_transfer(self, mock_workspace_manager):
+        """Test should_accelerate when hf_transfer is available."""
+        with patch(
+            "src.hf_downloader_tetra.DownloadAccelerator"
+        ) as mock_accelerator_class:
+            mock_accelerator = Mock()
+            mock_accelerator.hf_transfer_downloader.hf_transfer_available = True
+            mock_accelerator_class.return_value = mock_accelerator
+
+            downloader = TetraHFDownloader(mock_workspace_manager)
+
+            # Should accelerate large models
+            assert downloader.should_accelerate("gpt-3.5-turbo")
+            assert downloader.should_accelerate("llama")
+
+            # Should not accelerate small models
+            assert not downloader.should_accelerate("prajjwal1/bert-tiny")
+
+    def test_should_accelerate_without_hf_transfer(self, mock_workspace_manager):
+        """Test should_accelerate when hf_transfer is not available."""
+        with patch(
+            "src.hf_downloader_tetra.DownloadAccelerator"
+        ) as mock_accelerator_class:
+            mock_accelerator = Mock()
+            mock_accelerator.hf_transfer_downloader.hf_transfer_available = False
+            mock_accelerator_class.return_value = mock_accelerator
+
+            downloader = TetraHFDownloader(mock_workspace_manager)
+
+            # Should not accelerate any models without hf_transfer
+            assert not downloader.should_accelerate("gpt-3.5-turbo")
+            assert not downloader.should_accelerate("llama")
+
+    @patch("src.hf_downloader_tetra.Path.mkdir")
+    def test_download_model_success(self, mock_mkdir, mock_workspace_manager):
+        """Test successful model download."""
+        with patch(
+            "src.hf_downloader_tetra.DownloadAccelerator"
+        ) as mock_accelerator_class:
+            mock_accelerator = Mock()
+            mock_accelerator.hf_transfer_downloader.hf_transfer_available = True
+            mock_accelerator_class.return_value = mock_accelerator
+
+            downloader = TetraHFDownloader(mock_workspace_manager)
+
+            # Mock get_model_files to return test files
+            downloader.get_model_files = Mock(
+                return_value=[
+                    {
+                        "path": "pytorch_model.bin",
+                        "size": 100 * 1024 * 1024,
+                        "url": "https://test.com/file",
+                    }
+                ]
+            )
+
+            # Mock download_with_fallback to succeed
+            mock_accelerator.download_with_fallback.return_value = FunctionResponse(
+                success=True
+            )
+
+            result = downloader.download_model("gpt2")
+
+            assert result.success
+            assert "Successfully pre-downloaded" in result.stdout
+
+    def test_download_model_no_acceleration_needed(self, mock_workspace_manager):
+        """Test download when no acceleration is needed."""
+        with patch(
+            "src.hf_downloader_tetra.DownloadAccelerator"
+        ) as mock_accelerator_class:
+            mock_accelerator = Mock()
+            mock_accelerator.hf_transfer_downloader.hf_transfer_available = False
+            mock_accelerator_class.return_value = mock_accelerator
+
+            downloader = TetraHFDownloader(mock_workspace_manager)
+
+            result = downloader.download_model("prajjwal1/bert-tiny")
+
+            assert result.success
+            assert "does not require acceleration" in result.stdout
+
+
+class TestNativeHFDownloader:
+    """Tests for Native HF downloader strategy."""
+
+    def test_init(self, mock_workspace_manager):
+        """Test NativeHFDownloader initialization."""
+        downloader = NativeHFDownloader(mock_workspace_manager)
+        assert downloader.workspace_manager == mock_workspace_manager
+
+    def test_should_accelerate(self, mock_workspace_manager):
+        """Test should_accelerate logic."""
+        downloader = NativeHFDownloader(mock_workspace_manager)
+
+        # Should accelerate large models
+        assert downloader.should_accelerate("gpt-3.5-turbo")
+        assert downloader.should_accelerate("llama")
+
+        # Should not accelerate small models
+        assert not downloader.should_accelerate("prajjwal1/bert-tiny")
+
+    @patch("src.hf_downloader_native.snapshot_download")
+    def test_download_model_success(
+        self, mock_snapshot_download, mock_workspace_manager
+    ):
+        """Test successful model download."""
+        mock_snapshot_download.return_value = "/cache/models/gpt2"
+
+        downloader = NativeHFDownloader(mock_workspace_manager)
+        result = downloader.download_model("gpt2")
+
+        assert result.success
+        assert "Successfully pre-cached model gpt2" in result.stdout
+        mock_snapshot_download.assert_called_once_with(repo_id="gpt2", revision="main")
+
+    @patch("src.hf_downloader_native.snapshot_download")
+    def test_download_model_failure(
+        self, mock_snapshot_download, mock_workspace_manager
+    ):
+        """Test failed model download."""
+        mock_snapshot_download.side_effect = Exception("Download failed")
+
+        downloader = NativeHFDownloader(mock_workspace_manager)
+        result = downloader.download_model("gpt2")
+
+        assert not result.success
+        assert "Failed to pre-cache model gpt2" in result.error
+
+    def test_download_model_no_acceleration_needed(self, mock_workspace_manager):
+        """Test download when no acceleration is needed."""
+        downloader = NativeHFDownloader(mock_workspace_manager)
+        result = downloader.download_model("prajjwal1/bert-tiny")
+
+        assert result.success
+        assert "does not require pre-caching" in result.stdout
diff --git a/tests/unit/test_remote_executor.py b/tests/unit/test_remote_executor.py
index 98e4fcd..928adcb 100644
--- a/tests/unit/test_remote_executor.py
+++ b/tests/unit/test_remote_executor.py
@@ -1,7 +1,7 @@
 import pytest
 import base64
 import cloudpickle
-from unittest.mock import Mock, patch
+from unittest.mock import Mock, patch, AsyncMock
 
 from remote_executor import RemoteExecutor
 from remote_execution import FunctionRequest
@@ -109,11 +109,15 @@ async def test_execute_function_with_dependencies_orchestration(self):
             self.executor.workspace_manager, "initialize_workspace"
         ) as mock_init:
             with patch.object(
-                self.executor.dependency_installer, "install_system_dependencies"
-            ) as mock_sys_deps:
+                self.executor.dependency_installer,
+                "install_system_dependencies_async",
+                new_callable=AsyncMock,
+            ) as mock_sys_deps_async:
                 with patch.object(
-                    self.executor.dependency_installer, "install_dependencies"
-                ) as mock_py_deps:
+                    self.executor.dependency_installer,
+                    "install_dependencies_async",
+                    new_callable=AsyncMock,
+                ) as mock_py_deps_async:
                     with patch.object(
                         self.executor.function_executor, "execute"
                     ) as mock_execute:
@@ -121,10 +125,14 @@ async def test_execute_function_with_dependencies_orchestration(self):
                         mock_init.return_value = Mock(
                             success=True, stdout="Workspace ready"
                         )
-                        mock_sys_deps.return_value = Mock(
+
+                        # Mock async methods with proper FunctionResponse returns
+                        from remote_execution import FunctionResponse
+
+                        mock_sys_deps_async.return_value = FunctionResponse(
                             success=True, stdout="System deps installed"
                         )
-                        mock_py_deps.return_value = Mock(
+                        mock_py_deps_async.return_value = FunctionResponse(
                             success=True, stdout="Python deps installed"
                         )
                         mock_execute.return_value = Mock(
@@ -134,8 +142,8 @@ async def test_execute_function_with_dependencies_orchestration(self):
                         await self.executor.ExecuteFunction(request)
 
                         # Verify all components were called in correct order
-                        mock_sys_deps.assert_called_once_with(["curl"])
-                        mock_py_deps.assert_called_once_with(["requests"])
+                        mock_sys_deps_async.assert_called_once_with(["curl"], True)
+                        mock_py_deps_async.assert_called_once_with(["requests"], True)
                         mock_execute.assert_called_once_with(request)
 
     @pytest.mark.asyncio
@@ -184,8 +192,10 @@ async def test_execute_function_dependency_failure_stops_execution(self):
             self.executor.workspace_manager, "initialize_workspace"
         ) as mock_init:
             with patch.object(
-                self.executor.dependency_installer, "install_dependencies"
-            ) as mock_py_deps:
+                self.executor.dependency_installer,
+                "install_dependencies_async",
+                new_callable=AsyncMock,
+            ) as mock_py_deps_async:
                 with patch.object(
                     self.executor.function_executor, "execute"
                 ) as mock_execute:
@@ -193,7 +203,11 @@ async def test_execute_function_dependency_failure_stops_execution(self):
                     mock_init.return_value = Mock(
                         success=True, stdout="Workspace ready"
                     )
-                    mock_py_deps.return_value = Mock(
+
+                    # Mock async method with FunctionResponse
+                    from remote_execution import FunctionResponse
+
+                    mock_py_deps_async.return_value = FunctionResponse(
                         success=False, error="Package not found"
                     )
 
@@ -211,8 +225,8 @@ def test_component_access_methods(self):
             self.executor.dependency_installer, "install_dependencies"
         ) as mock_install:
             mock_install.return_value = Mock(success=True)
-            self.executor.dependency_installer.install_dependencies(["test"])
-            mock_install.assert_called_once_with(["test"])
+            self.executor.dependency_installer.install_dependencies(["test"], True)
+            mock_install.assert_called_once_with(["test"], True)
 
         # Test workspace manager methods
         with patch.object(
diff --git a/tests/unit/test_workspace_manager.py b/tests/unit/test_workspace_manager.py
index 69dd8bb..701ba70 100644
--- a/tests/unit/test_workspace_manager.py
+++ b/tests/unit/test_workspace_manager.py
@@ -218,22 +218,14 @@ def test_configure_volume_environment(self, mock_exists, mock_makedirs):
                 os.environ.get("UV_CACHE_DIR")
                 == f"{RUNPOD_VOLUME_PATH}/{UV_CACHE_DIR_NAME}"
             )
-            # HF cache is shared at volume root
+            # HF cache is shared at volume root - HF manages subdirectories automatically
             assert (
                 os.environ.get("HF_HOME") == f"{RUNPOD_VOLUME_PATH}/{HF_CACHE_DIR_NAME}"
             )
-            assert (
-                os.environ.get("TRANSFORMERS_CACHE")
-                == f"{RUNPOD_VOLUME_PATH}/{HF_CACHE_DIR_NAME}/transformers"
-            )
-            assert (
-                os.environ.get("HF_DATASETS_CACHE")
-                == f"{RUNPOD_VOLUME_PATH}/{HF_CACHE_DIR_NAME}/datasets"
-            )
-            assert (
-                os.environ.get("HUGGINGFACE_HUB_CACHE")
-                == f"{RUNPOD_VOLUME_PATH}/{HF_CACHE_DIR_NAME}/hub"
-            )
+            # HF automatically creates and manages subdirectories, no need to set specific paths
+            assert "TRANSFORMERS_CACHE" not in os.environ
+            assert "HF_DATASETS_CACHE" not in os.environ
+            assert "HUGGINGFACE_HUB_CACHE" not in os.environ
             # Virtual environment is endpoint-specific
             expected_venv = (
                 f"{RUNPOD_VOLUME_PATH}/{RUNTIMES_DIR_NAME}/default/{VENV_DIR_NAME}"
diff --git a/tetra-rp b/tetra-rp
index 4bc6a8c..5322042 160000
--- a/tetra-rp
+++ b/tetra-rp
@@ -1 +1 @@
-Subproject commit 4bc6a8cfdd141b3ae00521f326d917098b9c2c3b
+Subproject commit 5322042111dab88eb093c27d6a9e894e7b0f605b
diff --git a/uv.lock b/uv.lock
index 19edc18..c46d141 100644
--- a/uv.lock
+++ b/uv.lock
@@ -846,6 +846,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ee/45/b82e3c16be2182bff01179db177fe144d58b5dc787a7d4492c6ed8b9317f/frozenlist-1.7.0-py3-none-any.whl", hash = "sha256:9a5af342e34f7e97caf8c995864c7a396418ae2859cc6fdf1b1073020d516a7e", size = 13106 },
 ]
 
+[[package]]
+name = "fsspec"
+version = "2025.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8b/02/0835e6ab9cfc03916fe3f78c0956cfcdb6ff2669ffa6651065d5ebf7fc98/fsspec-2025.7.0.tar.gz", hash = "sha256:786120687ffa54b8283d942929540d8bc5ccfa820deb555a2b5d0ed2b737bf58", size = 304432 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2f/e0/014d5d9d7a4564cf1c40b5039bc882db69fd881111e03ab3657ac0b218e2/fsspec-2025.7.0-py3-none-any.whl", hash = "sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21", size = 199597 },
+]
+
 [[package]]
 name = "h11"
 version = "0.16.0"
@@ -855,6 +864,42 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
 ]
 
+[[package]]
+name = "hf-transfer"
+version = "0.1.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1a/eb/8fc64f40388c29ce8ce3b2b180a089d4d6b25b1d0d232d016704cb852104/hf_transfer-0.1.9.tar.gz", hash = "sha256:035572865dab29d17e783fbf1e84cf1cb24f3fcf8f1b17db1cfc7fdf139f02bf", size = 25201 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/f5/461d2e5f307e5048289b1168d5c642ae3bb2504e88dff1a38b92ed990a21/hf_transfer-0.1.9-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e66acf91df4a8b72f60223059df3003062a5ae111757187ed1a06750a30e911b", size = 1393046 },
+    { url = "https://files.pythonhosted.org/packages/41/ba/8d9fd9f1083525edfcb389c93738c802f3559cb749324090d7109c8bf4c2/hf_transfer-0.1.9-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:8669dbcc7a3e2e8d61d42cd24da9c50d57770bd74b445c65123291ca842a7e7a", size = 1348126 },
+    { url = "https://files.pythonhosted.org/packages/8e/a2/cd7885bc9959421065a6fae0fe67b6c55becdeda4e69b873e52976f9a9f0/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8fd0167c4407a3bc4cdd0307e65ada2294ec04f1813d8a69a5243e379b22e9d8", size = 3728604 },
+    { url = "https://files.pythonhosted.org/packages/f6/2e/a072cf196edfeda3310c9a5ade0a0fdd785e6154b3ce24fc738c818da2a7/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ee8b10afedcb75f71091bcc197c526a6ebf5c58bbbadb34fdeee6160f55f619f", size = 3064995 },
+    { url = "https://files.pythonhosted.org/packages/c2/84/aec9ef4c0fab93c1ea2b1badff38c78b4b2f86f0555b26d2051dbc920cde/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5828057e313de59300dd1abb489444bc452efe3f479d3c55b31a8f680936ba42", size = 3580908 },
+    { url = "https://files.pythonhosted.org/packages/29/63/b560d39651a56603d64f1a0212d0472a44cbd965db2fa62b99d99cb981bf/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc6bd19e1cc177c66bdef15ef8636ad3bde79d5a4f608c158021153b4573509d", size = 3400839 },
+    { url = "https://files.pythonhosted.org/packages/d6/d8/f87ea6f42456254b48915970ed98e993110521e9263472840174d32c880d/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdca9bfb89e6f8f281890cc61a8aff2d3cecaff7e1a4d275574d96ca70098557", size = 3552664 },
+    { url = "https://files.pythonhosted.org/packages/d6/56/1267c39b65fc8f4e2113b36297320f102718bf5799b544a6cbe22013aa1d/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:89a23f58b7b7effbc047b8ca286f131b17728c99a9f972723323003ffd1bb916", size = 4073732 },
+    { url = "https://files.pythonhosted.org/packages/82/1a/9c748befbe3decf7cb415e34f8a0c3789a0a9c55910dea73d581e48c0ce5/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:dc7fff1345980d6c0ebb92c811d24afa4b98b3e07ed070c8e38cc91fd80478c5", size = 3390096 },
+    { url = "https://files.pythonhosted.org/packages/72/85/4c03da147b6b4b7cb12e074d3d44eee28604a387ed0eaf7eaaead5069c57/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:1a6bd16c667ebe89a069ca163060127a794fa3a3525292c900b8c8cc47985b0d", size = 3664743 },
+    { url = "https://files.pythonhosted.org/packages/e7/6e/e597b04f753f1b09e6893075d53a82a30c13855cbaa791402695b01e369f/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d2fde99d502093ade3ab1b53f80da18480e9902aa960dab7f74fb1b9e5bc5746", size = 3695243 },
+    { url = "https://files.pythonhosted.org/packages/09/89/d4e234727a26b2546c8fb70a276cd924260d60135f2165bf8b9ed67bb9a4/hf_transfer-0.1.9-cp38-abi3-win32.whl", hash = "sha256:435cc3cdc8524ce57b074032b8fd76eed70a4224d2091232fa6a8cef8fd6803e", size = 1086605 },
+    { url = "https://files.pythonhosted.org/packages/a1/14/f1e15b851d1c2af5b0b1a82bf8eb10bda2da62d98180220ba6fd8879bb5b/hf_transfer-0.1.9-cp38-abi3-win_amd64.whl", hash = "sha256:16f208fc678911c37e11aa7b586bc66a37d02e636208f18b6bc53d29b5df40ad", size = 1160240 },
+]
+
+[[package]]
+name = "hf-xet"
+version = "1.1.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7a/49/91010b59debc7c862a5fd426d343134dd9a68778dbe570234b6495a4e204/hf_xet-1.1.8.tar.gz", hash = "sha256:62a0043e441753bbc446dcb5a3fe40a4d03f5fb9f13589ef1df9ab19252beb53", size = 484065 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9c/91/5814db3a0d4a65fb6a87f0931ae28073b87f06307701fe66e7c41513bfb4/hf_xet-1.1.8-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3d5f82e533fc51c7daad0f9b655d9c7811b5308e5890236828bd1dd3ed8fea74", size = 2752357 },
+    { url = "https://files.pythonhosted.org/packages/70/72/ce898516e97341a7a9d450609e130e108643389110261eaee6deb1ba8545/hf_xet-1.1.8-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:8e2dba5896bca3ab61d0bef4f01a1647004de59640701b37e37eaa57087bbd9d", size = 2613142 },
+    { url = "https://files.pythonhosted.org/packages/b7/d6/13af5f916cef795ac2b5e4cc1de31f2e0e375f4475d50799915835f301c2/hf_xet-1.1.8-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfe5700bc729be3d33d4e9a9b5cc17a951bf8c7ada7ba0c9198a6ab2053b7453", size = 3175859 },
+    { url = "https://files.pythonhosted.org/packages/4c/ed/34a193c9d1d72b7c3901b3b5153b1be9b2736b832692e1c3f167af537102/hf_xet-1.1.8-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:09e86514c3c4284ed8a57d6b0f3d089f9836a0af0a1ceb3c9dd664f1f3eaefef", size = 3074178 },
+    { url = "https://files.pythonhosted.org/packages/4a/1b/de6817b4bf65385280252dff5c9cceeedfbcb27ddb93923639323c1034a4/hf_xet-1.1.8-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4a9b99ab721d385b83f4fc8ee4e0366b0b59dce03b5888a86029cc0ca634efbf", size = 3238122 },
+    { url = "https://files.pythonhosted.org/packages/b7/13/874c85c7ed519ec101deb654f06703d9e5e68d34416730f64c4755ada36a/hf_xet-1.1.8-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:25b9d43333bbef39aeae1616789ec329c21401a7fe30969d538791076227b591", size = 3344325 },
+    { url = "https://files.pythonhosted.org/packages/9e/d3/0aaf279f4f3dea58e99401b92c31c0f752924ba0e6c7d7bb07b1dbd7f35e/hf_xet-1.1.8-cp37-abi3-win_amd64.whl", hash = "sha256:4171f31d87b13da4af1ed86c98cf763292e4720c088b4957cf9d564f92904ca9", size = 2801689 },
+]
+
 [[package]]
 name = "httpcore"
 version = "1.0.9"
@@ -919,6 +964,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 },
 ]
 
+[[package]]
+name = "huggingface-hub"
+version = "0.34.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "filelock" },
+    { name = "fsspec" },
+    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+    { name = "packaging" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/45/c9/bdbe19339f76d12985bc03572f330a01a93c04dffecaaea3061bdd7fb892/huggingface_hub-0.34.4.tar.gz", hash = "sha256:a4228daa6fb001be3f4f4bdaf9a0db00e1739235702848df00885c9b5742c85c", size = 459768 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/39/7b/bb06b061991107cd8783f300adff3e7b7f284e330fd82f507f2a1417b11d/huggingface_hub-0.34.4-py3-none-any.whl", hash = "sha256:9b365d781739c93ff90c359844221beef048403f1bc1f1c123c191257c3c890a", size = 561452 },
+]
+
 [[package]]
 name = "idna"
 version = "3.10"
@@ -2120,6 +2184,45 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/76/42/3efaf858001d2c2913de7f354563e3a3a2f0decae3efe98427125a8f441e/typer-0.16.0-py3-none-any.whl", hash = "sha256:1f79bed11d4d02d4310e3c1b7ba594183bcedb0ac73b27a9e5f28f6fb5b98855", size = 46317 },
 ]
 
+[[package]]
+name = "types-requests"
+version = "2.31.0.6"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+dependencies = [
+    { name = "types-urllib3", marker = "python_full_version < '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f9/b8/c1e8d39996b4929b918aba10dba5de07a8b3f4c8487bb61bb79882544e69/types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0", size = 15535 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5c/a1/6f8dc74d9069e790d604ddae70cb46dcbac668f1bb08136e7b0f2f5cd3bf/types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9", size = 14516 },
+]
+
+[[package]]
+name = "types-requests"
+version = "2.32.4.20250809"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+]
+dependencies = [
+    { name = "urllib3", version = "2.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ed/b0/9355adb86ec84d057fea765e4c49cce592aaf3d5117ce5609a95a7fc3dac/types_requests-2.32.4.20250809.tar.gz", hash = "sha256:d8060de1c8ee599311f56ff58010fb4902f462a1470802cf9f6ed27bc46c4df3", size = 23027 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2b/6f/ec0012be842b1d888d46884ac5558fd62aeae1f0ec4f7a581433d890d4b5/types_requests-2.32.4.20250809-py3-none-any.whl", hash = "sha256:f73d1832fb519ece02c85b1f09d5f0dd3108938e7d47e7f94bbfa18a6782b163", size = 20644 },
+]
+
+[[package]]
+name = "types-urllib3"
+version = "1.26.25.14"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/73/de/b9d7a68ad39092368fb21dd6194b362b98a1daeea5dcfef5e1adb5031c7e/types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f", size = 11239 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/11/7b/3fc711b2efea5e85a7a0bbfe269ea944aa767bbba5ec52f9ee45d362ccf3/types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e", size = 15377 },
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.14.1"
@@ -2470,7 +2573,10 @@ version = "0.4.1"
 source = { virtual = "." }
 dependencies = [
     { name = "cloudpickle" },
+    { name = "hf-transfer" },
+    { name = "huggingface-hub" },
     { name = "pydantic" },
+    { name = "requests" },
     { name = "runpod" },
 ]
 
@@ -2482,12 +2588,17 @@ dev = [
     { name = "pytest-cov" },
     { name = "pytest-mock" },
     { name = "ruff" },
+    { name = "types-requests", version = "2.31.0.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "types-requests", version = "2.32.4.20250809", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "cloudpickle", specifier = ">=3.1.1" },
+    { name = "hf-transfer", specifier = ">=0.1.0" },
+    { name = "huggingface-hub", specifier = ">=0.32.0" },
     { name = "pydantic", specifier = ">=2.11.4" },
+    { name = "requests", specifier = ">=2.25.0" },
     { name = "runpod" },
 ]
 
@@ -2499,6 +2610,7 @@ dev = [
     { name = "pytest-cov", specifier = ">=6.0.0" },
     { name = "pytest-mock", specifier = ">=3.14.0" },
     { name = "ruff", specifier = ">=0.8.0" },
+    { name = "types-requests", specifier = ">=2.25.0" },
 ]
 
 [[package]]