Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions src/aleph/vm/orchestrator/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@
VmSetupError,
MicroVMFailedInitError,
HostNotFoundError,
HTTPNotFound,
)

scheduling_errors: dict[ItemHash, Exception] = {}
Expand All @@ -414,8 +415,12 @@
vm_hash = ItemHash(vm_hash)
await start_persistent_vm(vm_hash, pubsub, pool)
except vm_creation_exceptions as error:
logger.exception(error)
logger.exception("Error while starting VM '%s': %s", vm_hash, error)

Check warning on line 418 in src/aleph/vm/orchestrator/views/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/orchestrator/views/__init__.py#L418

Added line #L418 was not covered by tests
scheduling_errors[vm_hash] = error
except Exception as error:

Check warning on line 420 in src/aleph/vm/orchestrator/views/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/orchestrator/views/__init__.py#L420

Added line #L420 was not covered by tests
# Handle unknown exception separately, to avoid leaking data
logger.exception("Unhandled Error while starting VM '%s': %s", vm_hash, error)
scheduling_errors[vm_hash] = Exception("Unhandled Error")

Check warning on line 423 in src/aleph/vm/orchestrator/views/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/orchestrator/views/__init__.py#L422-L423

Added lines #L422 - L423 were not covered by tests

# Schedule the start of instances:
for instance_hash in allocation.instances:
Expand All @@ -424,8 +429,12 @@
try:
await start_persistent_vm(instance_item_hash, pubsub, pool)
except vm_creation_exceptions as error:
logger.exception(error)
logger.exception("Error while starting VM '%s': %s", instance_hash, error)

Check warning on line 432 in src/aleph/vm/orchestrator/views/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/orchestrator/views/__init__.py#L432

Added line #L432 was not covered by tests
scheduling_errors[instance_item_hash] = error
except Exception as error:

Check warning on line 434 in src/aleph/vm/orchestrator/views/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/orchestrator/views/__init__.py#L434

Added line #L434 was not covered by tests
# Handle unknown exception separately, to avoid leaking data
logger.exception("Unhandled Error while starting VM '%s': %s", instance_hash, error)
scheduling_errors[vm_hash] = Exception("Unhandled Error")

Check warning on line 437 in src/aleph/vm/orchestrator/views/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/orchestrator/views/__init__.py#L436-L437

Added lines #L436 - L437 were not covered by tests

# Log unsupported features
if allocation.on_demand_vms:
Expand Down
6 changes: 5 additions & 1 deletion src/aleph/vm/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,12 @@
await asyncio.wait_for(file_downloaded_by_another_task(local_path), timeout=30)
except TimeoutError as error:
if attempt < (download_attempts - 1):
logger.warning(f"Download failed, retrying attempt {attempt + 1}/{download_attempts}...")
logger.warning(

Check warning on line 113 in src/aleph/vm/storage.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/storage.py#L113

Added line #L113 was not covered by tests
f"Download failed (waiting for another taks), retrying attempt {attempt + 1}/{download_attempts}..."
)
continue
else:
logger.warning(f"Download of {url} failed (waiting for another task), aborting...")

Check warning on line 118 in src/aleph/vm/storage.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/storage.py#L118

Added line #L118 was not covered by tests
raise error from file_exists_error
except (
aiohttp.ClientConnectionError,
Expand All @@ -123,6 +126,7 @@
logger.warning(f"Download failed, retrying attempt {attempt + 1}/{download_attempts}...")
# continue # continue inside try/finally block is unimplemented in `mypyc`
else:
logger.warning(f"Download of {url} failed (aborting...")

Check warning on line 129 in src/aleph/vm/storage.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/storage.py#L129

Added line #L129 was not covered by tests
raise error
finally:
# Ensure no partial file is left behind
Expand Down
Loading