diff --git a/src/aleph/vm/orchestrator/views/__init__.py b/src/aleph/vm/orchestrator/views/__init__.py index 94209c59f..b6a45c0d7 100644 --- a/src/aleph/vm/orchestrator/views/__init__.py +++ b/src/aleph/vm/orchestrator/views/__init__.py @@ -403,6 +403,7 @@ async def update_allocations(request: web.Request): VmSetupError, MicroVMFailedInitError, HostNotFoundError, + HTTPNotFound, ) scheduling_errors: dict[ItemHash, Exception] = {} @@ -414,8 +415,12 @@ async def update_allocations(request: web.Request): vm_hash = ItemHash(vm_hash) await start_persistent_vm(vm_hash, pubsub, pool) except vm_creation_exceptions as error: - logger.exception(error) + logger.exception("Error while starting VM '%s': %s", vm_hash, error) scheduling_errors[vm_hash] = error + except Exception as error: + # Handle unknown exception separately, to avoid leaking data + logger.exception("Unhandled Error while starting VM '%s': %s", vm_hash, error) + scheduling_errors[vm_hash] = Exception("Unhandled Error") # Schedule the start of instances: for instance_hash in allocation.instances: @@ -424,8 +429,12 @@ async def update_allocations(request: web.Request): try: await start_persistent_vm(instance_item_hash, pubsub, pool) except vm_creation_exceptions as error: - logger.exception(error) + logger.exception("Error while starting VM '%s': %s", instance_hash, error) scheduling_errors[instance_item_hash] = error + except Exception as error: + # Handle unknown exception separately, to avoid leaking data + logger.exception("Unhandled Error while starting VM '%s': %s", instance_hash, error) + scheduling_errors[vm_hash] = Exception("Unhandled Error") # Log unsupported features if allocation.on_demand_vms: diff --git a/src/aleph/vm/storage.py b/src/aleph/vm/storage.py index df0505ca7..15d3b9384 100644 --- a/src/aleph/vm/storage.py +++ b/src/aleph/vm/storage.py @@ -110,9 +110,12 @@ async def download_file(url: str, local_path: Path) -> None: await asyncio.wait_for(file_downloaded_by_another_task(local_path), timeout=30) except TimeoutError as error: if attempt < (download_attempts - 1): - logger.warning(f"Download failed, retrying attempt {attempt + 1}/{download_attempts}...") + logger.warning( + f"Download failed (waiting for another taks), retrying attempt {attempt + 1}/{download_attempts}..." + ) continue else: + logger.warning(f"Download of {url} failed (waiting for another task), aborting...") raise error from file_exists_error except ( aiohttp.ClientConnectionError, @@ -123,6 +126,7 @@ async def download_file(url: str, local_path: Path) -> None: logger.warning(f"Download failed, retrying attempt {attempt + 1}/{download_attempts}...") # continue # continue inside try/finally block is unimplemented in `mypyc` else: + logger.warning(f"Download of {url} failed (aborting...") raise error finally: # Ensure no partial file is left behind