exo-explore · cadenmackenzie · Nov 13, 2024 · Nov 13, 2024 · Nov 13, 2024 · Nov 13, 2024
diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py
@@ -2,6 +2,7 @@
 import time
 import asyncio
 import json
+import os
 from pathlib import Path
 from transformers import AutoTokenizer
 from typing import List, Literal, Union, Dict
@@ -15,8 +16,9 @@
 from exo.helpers import PrefixDict, shutdown
 from exo.inference.tokenizers import resolve_tokenizer
 from exo.orchestration import Node
-from exo.models import build_base_shard, model_cards, get_repo, pretty_name, get_supported_models
+from exo.models import build_base_shard, model_cards, get_repo, pretty_name
 from typing import Callable, Optional
+from exo.download.hf.hf_shard_download import HFShardDownloader
 
 class Message:
   def __init__(self, role: str, content: Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]):
@@ -213,13 +215,52 @@ async def handle_healthcheck(self, request):
     return web.json_response({"status": "ok"})
 
   async def handle_model_support(self, request):
-    return web.json_response({
-      "model pool": {
-        model_name: pretty_name.get(model_name, model_name) 
-        for model_name in get_supported_models(self.node.topology_inference_engines_pool)
-      }
-    })
-
+    try:
+      model_pool = {}
+
+      for model_name, pretty in pretty_name.items():
+        if model_name in model_cards:
+          model_info = model_cards[model_name]
+
+          # Get required engines from the node's topology directly
+          required_engines = list(dict.fromkeys(
+              [engine_name for engine_list in self.node.topology_inference_engines_pool 
+               for engine_name in engine_list 
+               if engine_name is not None] + 
+              [self.inference_engine_classname]
+          ))          
+          # Check if model supports required engines
+          if all(map(lambda engine: engine in model_info["repo"], required_engines)):
+            shard = build_base_shard(model_name, self.inference_engine_classname)
+            if shard:
+                # Use HFShardDownloader to check status without initiating download
+              downloader = HFShardDownloader(quick_check=True)  # quick_check=True prevents downloads
+              downloader.current_shard = shard
+              downloader.current_repo_id = get_repo(shard.model_id, self.inference_engine_classname)
+              status = await downloader.get_shard_download_status()
+              if DEBUG >= 2:
+                  print(f"Download status for {model_name}: {status}")
+
+              # Get overall percentage from status
+              download_percentage = status.get("overall") if status else None
+              if DEBUG >= 2 and download_percentage is not None:
+                  print(f"Overall download percentage for {model_name}: {download_percentage}")
+
+              model_pool[model_name] = {
+                  "name": pretty,
+                  "downloaded": download_percentage == 100 if download_percentage is not None else False,
+                  "download_percentage": download_percentage
+              }
+
+      return web.json_response({"model pool": model_pool})
+    except Exception as e:
+      print(f"Error in handle_model_support: {str(e)}")
+      traceback.print_exc()
+      return web.json_response(
+        {"detail": f"Server error: {str(e)}"}, 
+        status=500
+      )
+
   async def handle_get_models(self, request):
     return web.json_response([{"id": model_name, "object": "model", "owned_by": "exo", "ready": True} for model_name, _ in model_cards.items()])
 

diff --git a/exo/download/hf/hf_helpers.py b/exo/download/hf/hf_helpers.py
@@ -163,10 +163,18 @@ async def download_file(
     downloaded_size = local_file_size
     downloaded_this_session = 0
     mode = 'ab' if use_range_request else 'wb'
-    if downloaded_size == total_size:
+    percentage = await get_file_download_percentage(
+      session,
+      repo_id,
+      revision,
+      file_path,
+      Path(save_directory)
+    )
+
+    if percentage == 100:
       if DEBUG >= 2: print(f"File already downloaded: {file_path}")
       if progress_callback:
-        await progress_callback(RepoFileProgressEvent(repo_id, revision, file_path, downloaded_size, downloaded_this_session, total_size, 0, timedelta(0), "complete"))
+        await progress_callback(RepoFileProgressEvent(repo_id, revision, file_path, total_size, 0, total_size, 0, timedelta(0), "complete"))
       return
 
     if response.status == 200:
@@ -429,6 +437,57 @@ def get_allow_patterns(weight_map: Dict[str, str], shard: Shard) -> List[str]:
   if DEBUG >= 2: print(f"get_allow_patterns {weight_map=} {shard=} {shard_specific_patterns=}")
   return list(default_patterns | shard_specific_patterns)
 
+async def get_file_download_percentage(
+    session: aiohttp.ClientSession,
+    repo_id: str,
+    revision: str,
+    file_path: str,
+    snapshot_dir: Path,
+) -> float:
+  """
+    Calculate the download percentage for a file by comparing local and remote sizes.
+    """
+  try:
+    local_path = snapshot_dir / file_path
+    if not await aios.path.exists(local_path):
+      return 0
+
+    # Get local file size first
+    local_size = await aios.path.getsize(local_path)
+    if local_size == 0:
+      return 0
+
+    # Check remote size
+    base_url = f"{get_hf_endpoint()}/{repo_id}/resolve/{revision}/"
+    url = urljoin(base_url, file_path)
+    headers = await get_auth_headers()
+
+    # Use HEAD request with redirect following for all files
+    async with session.head(url, headers=headers, allow_redirects=True) as response:
+      if response.status != 200:
+        if DEBUG >= 2:
+          print(f"Failed to get remote file info for {file_path}: {response.status}")
+        return 0
+
+      remote_size = int(response.headers.get('Content-Length', 0))
+
+      if remote_size == 0:
+        if DEBUG >= 2:
+          print(f"Remote size is 0 for {file_path}")
+        return 0
+
+      # Only return 100% if sizes match exactly
+      if local_size == remote_size:
+        return 100.0
+
+      # Calculate percentage based on sizes
+      return (local_size / remote_size) * 100 if remote_size > 0 else 0
+
+  except Exception as e:
+    if DEBUG >= 2:
+      print(f"Error checking file download status for {file_path}: {e}")
+    return 0
+
 async def has_hf_home_read_access() -> bool:
   hf_home = get_hf_home()
   try: return await aios.access(hf_home, os.R_OK)
@@ -438,3 +497,4 @@ async def has_hf_home_write_access() -> bool:
   hf_home = get_hf_home()
   try: return await aios.access(hf_home, os.W_OK)
   except OSError: return False
+
diff --git a/exo/download/hf/hf_shard_download.py b/exo/download/hf/hf_shard_download.py
@@ -1,13 +1,20 @@
 import asyncio
 import traceback
 from pathlib import Path
-from typing import Dict, List, Tuple
+from typing import Dict, List, Tuple, Optional
 from exo.inference.shard import Shard
 from exo.download.shard_download import ShardDownloader
 from exo.download.download_progress import RepoProgressEvent
-from exo.download.hf.hf_helpers import download_repo_files, RepoProgressEvent, get_weight_map, get_allow_patterns, get_repo_root
+from exo.download.hf.hf_helpers import (
+    download_repo_files, RepoProgressEvent, get_weight_map, 
+    get_allow_patterns, get_repo_root, fetch_file_list, 
+    get_local_snapshot_dir, get_file_download_percentage,
+    filter_repo_objects
+)
 from exo.helpers import AsyncCallbackSystem, DEBUG
 from exo.models import model_cards, get_repo
+import aiohttp
+from aiofiles import os as aios
 
 
 class HFShardDownloader(ShardDownloader):
@@ -17,8 +24,13 @@ def __init__(self, quick_check: bool = False, max_parallel_downloads: int = 4):
     self.active_downloads: Dict[Shard, asyncio.Task] = {}
     self.completed_downloads: Dict[Shard, Path] = {}
     self._on_progress = AsyncCallbackSystem[str, Tuple[Shard, RepoProgressEvent]]()
+    self.current_shard: Optional[Shard] = None
+    self.current_repo_id: Optional[str] = None
+    self.revision: str = "main"
 
   async def ensure_shard(self, shard: Shard, inference_engine_name: str) -> Path:
+    self.current_shard = shard
+    self.current_repo_id = get_repo(shard.model_id, inference_engine_name)
     repo_name = get_repo(shard.model_id, inference_engine_name)
     if shard in self.completed_downloads:
       return self.completed_downloads[shard]
@@ -77,3 +89,73 @@ async def wrapped_progress_callback(event: RepoProgressEvent):
   @property
   def on_progress(self) -> AsyncCallbackSystem[str, Tuple[Shard, RepoProgressEvent]]:
     return self._on_progress
+
+  async def get_shard_download_status(self) -> Optional[Dict[str, float]]:
+    if not self.current_shard or not self.current_repo_id:
+      if DEBUG >= 2:
+        print(f"No current shard or repo_id set: {self.current_shard=} {self.current_repo_id=}")
+      return None
+
+    try:
+      # If no snapshot directory exists, return None - no need to check remote files
+      snapshot_dir = await get_local_snapshot_dir(self.current_repo_id, self.revision)
+      if not snapshot_dir:
+        if DEBUG >= 2:
+          print(f"No snapshot directory found for {self.current_repo_id}")
+        return None
+
+      # Get the weight map to know what files we need
+      weight_map = await get_weight_map(self.current_repo_id, self.revision)
+      if not weight_map:
+        if DEBUG >= 2:
+          print(f"No weight map found for {self.current_repo_id}")
+        return None
+
+      # Get all files needed for this shard
+      patterns = get_allow_patterns(weight_map, self.current_shard)
+
+      # Check download status for all relevant files
+      status = {}
+      total_bytes = 0
+      downloaded_bytes = 0
+
+      async with aiohttp.ClientSession() as session:
+        file_list = await fetch_file_list(session, self.current_repo_id, self.revision)
+        relevant_files = list(
+            filter_repo_objects(
+                file_list, allow_patterns=patterns, key=lambda x: x["path"]))
+
+        for file in relevant_files:
+          file_size = file["size"]
+          total_bytes += file_size
+
+          percentage = await get_file_download_percentage(
+              session,
+              self.current_repo_id,
+              self.revision,
+              file["path"],
+              snapshot_dir,
+          )
+          status[file["path"]] = percentage
+          downloaded_bytes += (file_size * (percentage / 100))
+
+        # Add overall progress weighted by file size
+        if total_bytes > 0:
+          status["overall"] = (downloaded_bytes / total_bytes) * 100
+        else:
+          status["overall"] = 0
+
+        if DEBUG >= 2:
+          print(f"Download calculation for {self.current_repo_id}:")
+          print(f"Total bytes: {total_bytes}")
+          print(f"Downloaded bytes: {downloaded_bytes}")
+          for file in relevant_files:
+            print(f"File {file['path']}: size={file['size']}, percentage={status[file['path']]}")
+
+      return status
+
+    except Exception as e:
+      if DEBUG >= 2:
+        print(f"Error getting shard download status: {e}")
+        traceback.print_exc()
+      return None
diff --git a/exo/download/shard_download.py b/exo/download/shard_download.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Optional, Tuple
+from typing import Optional, Tuple, Dict
 from pathlib import Path
 from exo.inference.shard import Shard
 from exo.download.download_progress import RepoProgressEvent
@@ -26,6 +26,16 @@ async def ensure_shard(self, shard: Shard, inference_engine_name: str) -> Path:
   def on_progress(self) -> AsyncCallbackSystem[str, Tuple[Shard, RepoProgressEvent]]:
     pass
 
+  @abstractmethod
+  async def get_shard_download_status(self) -> Optional[Dict[str, float]]:
+    """Get the download status of shards.
+
+    Returns:
+        Optional[Dict[str, float]]: A dictionary mapping shard IDs to their download percentage (0-100),
+        or None if status cannot be determined
+    """
+    pass
+
 
 class NoopShardDownloader(ShardDownloader):
   async def ensure_shard(self, shard: Shard, inference_engine_name: str) -> Path:

diff --git a/exo/tinychat/index.html b/exo/tinychat/index.html
@@ -26,13 +26,13 @@
 <body>
 <main x-data="state" x-init="console.log(endpoint)">
      <!-- Error Toast -->
-    <div x-show="errorMessage" x-transition.opacity class="toast">
+    <div x-show="errorMessage !== null" x-transition.opacity class="toast">
         <div class="toast-header">
-            <span class="toast-error-message" x-text="errorMessage.basic"></span>
+            <span class="toast-error-message" x-text="errorMessage?.basic || ''"></span>
             <div class="toast-header-buttons">
                 <button @click="errorExpanded = !errorExpanded; if (errorTimeout) { clearTimeout(errorTimeout); errorTimeout = null; }" 
                         class="toast-expand-button" 
-                        x-show="errorMessage.stack">
+                        x-show="errorMessage?.stack">
                     <span x-text="errorExpanded ? 'Hide Details' : 'Show Details'"></span>
                 </button>
                 <button @click="errorMessage = null; errorExpanded = false;" class="toast-close-button">
@@ -41,11 +41,11 @@
             </div>
         </div>
         <div class="toast-content" x-show="errorExpanded" x-transition>
-            <span x-text="errorMessage.stack"></span>
+            <span x-text="errorMessage?.stack || ''"></span>
         </div>
     </div>
 <div class="model-selector">
-  <select @change="if (cstate) cstate.selectedModel = $event.target.value" x-model="cstate.selectedModel" x-init="await populateSelector()" class='model-select'>
+  <select @change="if (cstate) cstate.selectedModel = $event.target.value" x-model="cstate.selectedModel" class='model-select'>
   </select>
 </div>
 <div @popstate.window="