Skip to content
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ Unless your focus is developing the VM-Connector, using the Docker image is easi
```

2. **Install the Debian Package**
This will also install required some dep and config file, making the setup easier.

Replace `1.2.0` with the latest release version.

**On Debian 12 (Bookworm)**:
Expand All @@ -94,16 +96,27 @@ Unless your focus is developing the VM-Connector, using the Docker image is easi
To prevent conflicts, deactivate the system version of aleph-vm by disabling its `systemd` service.

```shell
sudo systemctl stop aleph-vm-supervisor
sudo systemctl disable aleph-vm-supervisor.service
```

4. **Clone the Repository and Set Up a Virtual Environment**
- Clone the aleph-vm repository to your development environment.
- Create a virtual environment to manage dependencies.
- Install the system dependencies required to build the python venv and packages

Inside the virtual environment, run:
```shell
apt install libdbus-1-dev libglib2.0-dev libsystemd-dev python3.12-venv
```

- Create a virtual environment to manage dependencies. It need to be a system site package as we use some dependencies provided via debian packages. Such as network interfaces
```shell
python3 -m venv --system-site-packages ~/.virtualenvs/aleph-vm
```

Active the virtual environment and install the deps:

```shell
source ~/.virtualenvs/aleph-vm/bin/activate
pip install -e .
```

Expand Down
2 changes: 1 addition & 1 deletion packaging/aleph-vm/etc/haproxy/haproxy-aleph.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ backend bk_http
# For HTTP - Use Host header
http-request set-header Host %[req.hdr(host)]
use-server %[var(txn.host),lower,map(/etc/haproxy/http_domains.map)] if { var(txn.host) -m found }
server web1 127.0.0.1:4020
server fallback_local 127.0.0.1:4020

# Default to fallback to the aleph-vm supervisor
# Backend to terminate TLS for fallback (uses internal http frontend)
Expand Down
13 changes: 10 additions & 3 deletions src/aleph/vm/haproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ def update_haproxy_backends(socket_path, backend_name, map_file_path, weight=1):

# Remove servers that are not in the map file
servers_to_remove = set(current_servers) - processed_servers
# Do not remove the fallback server
servers_to_remove.discard("fallback_local")
servers_to_remove.discard("web1")
if servers_to_remove:
logger.info(f"Removing {len(servers_to_remove)} servers no longer in map file")
for server_name in servers_to_remove:
Expand All @@ -283,9 +286,10 @@ def update_haproxy_backends(socket_path, backend_name, map_file_path, weight=1):
return True


async def fetch_list() -> list[dict]:
async def fetch_list(domain=None) -> list[dict]:
async with aiohttp.ClientSession() as client:
resp = await client.get(url=str(settings.DOMAIN_SERVICE_URL))
resp = await client.get(url=str(settings.DOMAIN_SERVICE_URL), params={"crn": domain} if domain else None)
# print(resp.real_url)
resp.raise_for_status()
instances = await resp.json()
if len(instances) == 0:
Expand All @@ -294,7 +298,10 @@ async def fetch_list() -> list[dict]:


async def fetch_list_and_update(socket_path, local_vms: list[str], force_update):
instances = await fetch_list()
if settings.DOMAIN_NAME in ("localhost", "vm.example.org"):
logger.info("Skipping domain update because DOMAIN_NAME is not set")

instances = await fetch_list(settings.DOMAIN_NAME)
# filter on local hash
instances = [i for i in instances if i["item_hash"] in local_vms]
# This should match the config in haproxy.cfg
Expand Down
9 changes: 8 additions & 1 deletion src/aleph/vm/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ def to_dict(self):
return self.__dict__


LAST_ASSIGNED_HOST_PORT = 24000


class VmExecution:
"""
Control the execution of a VM on a high level.
Expand Down Expand Up @@ -136,7 +139,11 @@ async def update_port_redirects(self, requested_ports: dict[int, dict[str, bool]

for vm_port in redirect_to_add:
target = requested_ports[vm_port]
host_port = get_available_host_port(start_port=24000)
host_port = get_available_host_port(start_port=LAST_ASSIGNED_HOST_PORT)
LAST_ASSIGNED_HOST_PORT = host_port
if LAST_ASSIGNED_HOST_PORT > 65535:
LAST_ASSIGNED_HOST_PORT = 24000

for protocol in SUPPORTED_PROTOCOL_FOR_REDIRECT:
if target[protocol]:
add_port_redirect_rule(self.vm.vm_id, interface, host_port, vm_port, protocol)
Expand Down
5 changes: 2 additions & 3 deletions src/aleph/vm/orchestrator/machine.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import asyncio
import json
import re
import shutil
Expand All @@ -17,9 +16,9 @@ async def get_hardware_info():
hw_info = {"cpu": None, "memory": None}

for hw in data["children"][0]["children"]:
if hw["id"] == "cpu" or hw["id"].startswith("cpu"):
if hw["id"] == "cpu" or hw["id"].startswith("cpu") and not hw.get("disabled"):
hw_info["cpu"] = hw
elif hw["class"] == "memory" and hw["id"] == "memory":
elif hw["class"] == "memory" and hw["id"] == "memory" and not hw.get("disabled"):
hw_info["memory"] = hw

return hw_info
Expand Down
3 changes: 3 additions & 0 deletions src/aleph/vm/orchestrator/supervisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
about_execution_records,
about_executions,
about_login,
debug_haproxy,
list_executions,
list_executions_v2,
notify_allocation,
Expand Down Expand Up @@ -85,6 +86,7 @@ async def error_middleware(request, handler) -> web.Response:
status = exc.status
return web.json_response({"error": message}, status=status)
except Exception as exc:
logger.exception("Unhandled exception for %s", request.path)
message = str(exc)
status = 500
return web.json_response({"error": message, "error_type": str(type(exc))}, status=status)
Expand Down Expand Up @@ -145,6 +147,7 @@ def setup_webapp(pool: VmPool | None):
web.post("/control/machine/{ref}/confidential/initialize", operate_confidential_initialize),
web.get("/control/machine/{ref}/confidential/measurement", operate_confidential_measurement),
web.post("/control/machine/{ref}/confidential/inject_secret", operate_confidential_inject_secret),
web.get("/debug/haproxy", debug_haproxy),
# /status APIs are used to check that the VM Orchestrator is running properly
web.get("/status/check/fastapi", status_check_fastapi),
web.get("/status/check/fastapi/legacy", status_check_fastapi_legacy),
Expand Down
21 changes: 21 additions & 0 deletions src/aleph/vm/orchestrator/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from aleph_message.models import InstanceContent, ItemHash, MessageType, PaymentType
from pydantic import ValidationError

from aleph.vm import haproxy
from aleph.vm.conf import settings
from aleph.vm.controllers.firecracker.executable import (
ResourceDownloadError,
Expand Down Expand Up @@ -148,6 +149,26 @@ async def about_login(request: web.Request) -> web.Response:
return web.json_response({"success": False}, status=401)


async def debug_haproxy(request: web.Request) -> web.Response:
""" "Debug endpoint to check the status of HAProxy and the domains mapped to it.

This is a debug endpoint and should not be used in production. The interface is subject to change.
"""
socket = settings.HAPROXY_SOCKET
import pathlib

if not pathlib.Path(socket).exists():
logger.info("HAProxy not running? socket not found, skip domain mapping update")
return web.json_response({"status": "no socket"}, status=http.HTTPStatus)
r: dict = {"status": "ok", "backends": {}}
for backend in haproxy.HAPROXY_BACKENDS:
r["backends"][str(backend["name"])] = haproxy.get_current_backends(socket, backend["name"])
return web.json_response(
r,
dumps=dumps_for_json,
)


@cors_allow_all
async def about_executions(request: web.Request) -> web.Response:
"/about/executions/details Debugging endpoint with full execution details."
Expand Down
1 change: 1 addition & 0 deletions src/aleph/vm/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ async def load_persistent_executions(self):
await execution.fetch_port_redirect_config_and_setup()

self.executions[vm_hash] = execution
execution.record = saved_execution
else:
execution.uuid = saved_execution.uuid
await execution.record_usage()
Expand Down
179 changes: 179 additions & 0 deletions tests/supervisor/test_haproxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import pytest

from aleph.vm import haproxy

# Sample response for https://api.dns.public.aleph.sh/instances/list
sample_domain_instance_list = [
{
"name": "api-dev.thronetools.com",
"item_hash": "747b52c712e16642b498f16c4c6e68d5fb00ddbaf8d2a0dc7bd298d33abb9124",
"ipv6": "2a01:240:ad00:2502:3:747b:52c7:12e1",
"ipv4": {"public": "46.247.131.211", "local": "172.16.15.1/32"},
},
{
"name": "centurion.cybernetwork.me",
"item_hash": "cefb9373558927d70365746900a410f01e1340ecff0dda93deb672f55bb70ac8",
"ipv6": "2a01:240:ad00:2502:3:cefb:9373:5581",
"ipv4": {"public": "46.247.131.211", "local": "172.16.52.1/32"},
},
{
"name": "cms-dev.thronetools.com",
"item_hash": "747b52c712e16642b498f16c4c6e68d5fb00ddbaf8d2a0dc7bd298d33abb9124",
"ipv6": "2a01:240:ad00:2502:3:747b:52c7:12e1",
"ipv4": {"public": "46.247.131.211", "local": "172.16.15.1/32"},
},
{
"name": "platform-api.3mera.dev",
"item_hash": "d78e81d99e7468302bdaf82b5ca338b486629cf813384bdc3282e2b8fa7f478f",
"ipv6": "2a01:240:ad00:2502:3:d78e:81d9:9e71",
"ipv4": {"public": "46.247.131.211", "local": "172.16.30.1/32"},
},
{
"name": "platform-variants.3mera.dev",
"item_hash": "d78e81d99e7468302bdaf82b5ca338b486629cf813384bdc3282e2b8fa7f478f",
"ipv6": "2a01:240:ad00:2502:3:d78e:81d9:9e71",
"ipv4": {"public": "46.247.131.211", "local": "172.16.30.1/32"},
},
{
"name": "platform.3mera.dev",
"item_hash": "d78e81d99e7468302bdaf82b5ca338b486629cf813384bdc3282e2b8fa7f478f",
"ipv6": "2a01:240:ad00:2502:3:d78e:81d9:9e71",
"ipv4": {"public": "46.247.131.211", "local": "172.16.30.1/32"},
},
{
"name": "praetorian.cybernetwork.me",
"item_hash": "ec18fa850f6a530a8c0e6a616b0df5def3ab3662eb6feeba8ece580780a86dc6",
"ipv6": "2a01:240:ad00:2502:3:ec18:fa85:f61",
"ipv4": {"public": "46.247.131.211", "local": "172.16.19.1/32"},
},
{
"name": "template-frontend.3mera.dev",
"item_hash": "d78e81d99e7468302bdaf82b5ca338b486629cf813384bdc3282e2b8fa7f478f",
"ipv6": "2a01:240:ad00:2502:3:d78e:81d9:9e71",
"ipv4": {"public": "46.247.131.211", "local": "172.16.30.1/32"},
},
{
"name": "test-twentysix-cloud.gerardmolina.com",
"item_hash": "31826cf53c655bd25d50f7e371242baf240d4f854372c798a37bb9eb6c562682",
"ipv6": "2a01:240:ad00:2501:3:3182:6cf5:3c61",
"ipv4": {"public": "46.255.204.201", "local": "172.16.5.1/32"},
},
]


@pytest.fixture
def mock_sample_domain_instance_list(mocker):
mocker.patch("aleph.vm.haproxy.fetch_list", mocker.AsyncMock(return_value=sample_domain_instance_list))


@pytest.mark.asyncio
async def test_fetch_list(mock_sample_domain_instance_list):
list = await haproxy.fetch_list()
assert len(list) == 9


@pytest.fixture
def mock_small_domain_list(mocker):
small_list = [
{
"name": "echo.agot.be",
"item_hash": "decadecadecadecadecadecadecadecadecadecadecadecadecadecadecadeca",
"ipv6": "2a01:240:ad00:2502:3:747b:52c7:12e1",
"ipv4": {"public": "46.247.131.211", "local": "172.16.4.1/32"},
}
]
mocker.patch("aleph.vm.haproxy.fetch_list", mocker.AsyncMock(return_value=small_list))


@pytest.mark.asyncio
async def test_update_map_file(mock_small_domain_list, tmp_path):
map_file_path = tmp_path / "backend.map"
instance_list = await haproxy.fetch_list()
assert instance_list

haproxy.update_mapfile(instance_list, str(map_file_path), 22)
content = map_file_path.read_text()
assert content == "echo.agot.be 172.16.4.2:22\n"


@pytest.fixture
def mock_socket_command(mocker):
commands = []
existing_servers: list[str] = []

def mock_response(socket_path, command): # noqa: ARG001
commands.append(command)
if "show servers state" in command:
return "1\n# be_id be_name srv_id srv_name srv_addr srv_op_state\n" + "\n".join(mock.existing_servers)
elif "disable server" in command:
return ""
elif "set server" in command:
return ""
elif "enable server" in command:
return ""
return ""

mock = mocker.patch("aleph.vm.haproxy.send_socket_command", mock_response)
mock.existing_servers = existing_servers
mock.commands = commands
return mock


@pytest.mark.asyncio
async def test_update_backend_add_server(mock_socket_command, tmp_path):
map_file_path = tmp_path / "backend.map"
map_file_path.write_text("echo.agot.be 172.16.4.2:22\n")
socket_path = "fakyfake"

# Run test
haproxy.update_haproxy_backends(socket_path, "test_backend", map_file_path, weight=1)

# Verify commands
assert mock_socket_command.commands == [
"show servers state test_backend",
# "disable server test_backend echo.agot.be",
"add server test_backend/echo.agot.be 172.16.4.2:22 weight 1 maxconn 30",
# "set server test_backend echo.agot.be addr 172.16.4.2 port 22",
# "set server test_backend echo.agot.be weight 1",
"enable server test_backend/echo.agot.be",
]


@pytest.mark.asyncio
def test_update_backend_add_server_remove_server(mock_socket_command, tmp_path):
map_file_path = tmp_path / "backend.map"
map_file_path.write_text("echo.agot.be 172.16.4.2:22\n")
socket_path = "fakyfake"

mock_socket_command.existing_servers = [
"8 test_backend 1 existing_bk 127.0.0.1 2 0 1 1 683294 1 0 2 0 0 0 0 - 4020 - 0 0 - - 0"
]
haproxy.update_haproxy_backends(socket_path, "test_backend", map_file_path, weight=1)

# Verify commands
assert mock_socket_command.commands == [
"show servers state test_backend",
"add server test_backend/echo.agot.be 172.16.4.2:22 weight 1 maxconn 30",
"enable server test_backend/echo.agot.be",
"set server test_backend/existing_bk state maint",
"del server test_backend/existing_bk",
]


@pytest.mark.asyncio
def test_update_backend_do_no_remove_fallback(mock_socket_command, tmp_path):
map_file_path = tmp_path / "backend.map"
map_file_path.write_text("echo.agot.be 172.16.4.2:22\n")
socket_path = "fakyfake"

mock_socket_command.existing_servers = [
"8 test_backend 1 fallback_local 127.0.0.1 2 0 1 1 683294 1 0 2 0 0 0 0 - 4020 - 0 0 - - 0"
]
haproxy.update_haproxy_backends(socket_path, "test_backend", map_file_path, weight=1)

# Verify commands
assert mock_socket_command.commands == [
"show servers state test_backend",
"add server test_backend/echo.agot.be 172.16.4.2:22 weight 1 maxconn 30",
"enable server test_backend/echo.agot.be",
]
Loading