Skip to content

Commit 2b30a6a

Browse files
authored
Implement Start Confidential endpoint (#627)
* Problem: The server don't have a directory to save the platform certificates generated by sevctl. Solution: Set that directory field on settings class and ensure to create the folder on initialization step. * Problem: The aren't an endpoint to be able to get the confidential platform certificates to start the VM key exchange. Solution: Create that endpoint and return the platform certificates generated by the `sevctl` command. * Fix: Solved code quality issues. * Fix: Added 2 test cases for that endpoint. * Fix: Added PR suggestions. * Fix: Modified test mock to let the tests work * Problem: Now isn't possible as a VM operator to get the client session certificates to initialize a confidential VM. Solution: Create an operator start endpoint that receive the confidential session files and starts the qemu VM to continue with the certificate exchange methods. * Fix: Remove useless aiofiles import * Fix: Solve test issues after code quality fixes * Fix: Solve code quality issues. * Fix: Solve code quality issues. * Fix: Write file in sync mode to avoid adding a new dependency. Files to write should be so small, so any blocking issue should be here. * Fix: Solved PR comments and wrong conditionals. * Fix: Solved more PR comments. * Fix: Removed unexisting import * Fix: Added useless command requested on the PR review. * Fix: Changed endpoint path and added automatic tests for that endpoint. * Fix: Solved settings singleton issue with testing, adding an `initialize_settings` method. * Fix: Just disable the setting that is failing and remove previous method to initialize the singleton.
1 parent a4e26a8 commit 2b30a6a

File tree

7 files changed

+199
-3
lines changed

7 files changed

+199
-3
lines changed

src/aleph/vm/conf.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,8 @@ class Settings(BaseSettings):
272272
description="Confidential Computing default directory. Default to EXECUTION_ROOT/confidential",
273273
)
274274

275+
CONFIDENTIAL_SESSION_DIRECTORY: Path = Field(None, description="Default to EXECUTION_ROOT/sessions")
276+
275277
# Tests on programs
276278

277279
FAKE_DATA_PROGRAM: Optional[Path] = None
@@ -416,6 +418,7 @@ def setup(self):
416418
os.makedirs(self.EXECUTION_LOG_DIRECTORY, exist_ok=True)
417419
os.makedirs(self.PERSISTENT_VOLUMES_DIR, exist_ok=True)
418420
os.makedirs(self.CONFIDENTIAL_DIRECTORY, exist_ok=True)
421+
os.makedirs(self.CONFIDENTIAL_SESSION_DIRECTORY, exist_ok=True)
419422

420423
self.API_SERVER = self.API_SERVER.rstrip("/")
421424

@@ -464,6 +467,8 @@ def __init__(
464467
self.RUNTIME_CACHE = self.CACHE_ROOT / "runtime"
465468
if not self.DATA_CACHE:
466469
self.DATA_CACHE = self.CACHE_ROOT / "data"
470+
if not self.CONFIDENTIAL_DIRECTORY:
471+
self.CONFIDENTIAL_DIRECTORY = self.CACHE_ROOT / "confidential"
467472
if not self.JAILER_BASE_DIRECTORY:
468473
self.JAILER_BASE_DIRECTORY = self.EXECUTION_ROOT / "jailer"
469474
if not self.PERSISTENT_VOLUMES_DIR:
@@ -474,8 +479,8 @@ def __init__(
474479
self.EXECUTION_LOG_DIRECTORY = self.EXECUTION_ROOT / "executions"
475480
if not self.JAILER_BASE_DIR:
476481
self.JAILER_BASE_DIR = self.EXECUTION_ROOT / "jailer"
477-
if not self.CONFIDENTIAL_DIRECTORY:
478-
self.CONFIDENTIAL_DIRECTORY = self.CACHE_ROOT / "confidential"
482+
if not self.CONFIDENTIAL_SESSION_DIRECTORY:
483+
self.CONFIDENTIAL_SESSION_DIRECTORY = self.EXECUTION_ROOT / "sessions"
479484

480485
class Config:
481486
env_prefix = "ALEPH_VM_"

src/aleph/vm/models.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ def is_program(self) -> bool:
104104
def is_instance(self) -> bool:
105105
return isinstance(self.message, InstanceContent)
106106

107+
@property
108+
def is_confidential(self) -> bool:
109+
return self.uses_payment_stream # TODO: check also if the VM message is confidential
110+
107111
@property
108112
def hypervisor(self) -> HypervisorType:
109113
if self.is_program:

src/aleph/vm/orchestrator/supervisor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
update_allocations,
4848
)
4949
from .views.operator import (
50+
operate_confidential_initialize,
5051
operate_erase,
5152
operate_expire,
5253
operate_reboot,
@@ -102,6 +103,7 @@ def setup_webapp():
102103
web.post("/control/allocation/notify", notify_allocation),
103104
web.get("/control/machine/{ref}/logs", stream_logs),
104105
web.post("/control/machine/{ref}/expire", operate_expire),
106+
web.post("/control/machine/{ref}/confidential/initialize", operate_confidential_initialize),
105107
web.post("/control/machine/{ref}/stop", operate_stop),
106108
web.post("/control/machine/{ref}/erase", operate_erase),
107109
web.post("/control/machine/{ref}/reboot", operate_reboot),

src/aleph/vm/orchestrator/views/operator.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from aleph_message.models import ItemHash
99
from aleph_message.models.execution import BaseExecutableContent
1010

11+
from aleph.vm.conf import settings
1112
from aleph.vm.models import VmExecution
1213
from aleph.vm.orchestrator.run import create_vm_execution
1314
from aleph.vm.orchestrator.views import authenticate_api_request
@@ -133,6 +134,50 @@ async def operate_expire(request: web.Request, authenticated_sender: str) -> web
133134
return web.Response(status=200, body=f"Expiring VM with ref {vm_hash} in {timeout} seconds")
134135

135136

137+
@cors_allow_all
138+
@require_jwk_authentication
139+
async def operate_confidential_initialize(request: web.Request, authenticated_sender: str) -> web.Response:
140+
"""Start the confidential virtual machine if possible."""
141+
# TODO: Add user authentication
142+
vm_hash = get_itemhash_or_400(request.match_info)
143+
144+
pool: VmPool = request.app["vm_pool"]
145+
logger.debug(f"Iterating through running executions... {pool.executions}")
146+
execution = get_execution_or_404(vm_hash, pool=pool)
147+
148+
if not is_sender_authorized(authenticated_sender, execution.message):
149+
return web.Response(status=403, body="Unauthorized sender")
150+
151+
if execution.is_running:
152+
return web.Response(status=403, body=f"VM with ref {vm_hash} already running")
153+
154+
if not execution.is_confidential:
155+
return web.Response(status=403, body=f"Operation not allowed for VM {vm_hash} because it isn't confidential")
156+
157+
post = await request.post()
158+
159+
vm_session_path = settings.CONFIDENTIAL_SESSION_DIRECTORY / vm_hash
160+
vm_session_path.mkdir(exist_ok=True)
161+
162+
session_file_content = post.get("session")
163+
if not session_file_content:
164+
return web.Response(status=403, body=f"Session file required for VM with ref {vm_hash}")
165+
166+
session_file_path = vm_session_path / "vm_session.b64"
167+
session_file_path.write_bytes(session_file_content.file.read())
168+
169+
godh_file_content = post.get("godh")
170+
if not godh_file_content:
171+
return web.Response(status=403, body=f"GODH file required for VM with ref {vm_hash}")
172+
173+
godh_file_path = vm_session_path / "vm_godh.b64"
174+
godh_file_path.write_bytes(godh_file_content.file.read())
175+
176+
pool.systemd_manager.enable_and_start(execution.controller_service)
177+
178+
return web.Response(status=200, body=f"Started VM with ref {vm_hash}")
179+
180+
136181
@cors_allow_all
137182
@require_jwk_authentication
138183
async def operate_stop(request: web.Request, authenticated_sender: str) -> web.Response:

src/aleph/vm/pool.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,9 @@ async def create_a_vm(
123123
await execution.start()
124124

125125
# Start VM and snapshots automatically
126-
if execution.persistent:
126+
# If the execution is confidential, don't start it because we need to wait for the session certificate
127+
# files, use the endpoint /control/machine/{ref}/start to get session files and start the VM
128+
if execution.persistent and not execution.is_confidential:
127129
self.systemd_manager.enable_and_start(execution.controller_service)
128130
await execution.wait_for_init()
129131
if execution.is_program and execution.vm:

tests/supervisor/test_operator.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import io
2+
import tempfile
3+
from pathlib import Path
4+
from unittest import mock
5+
from unittest.mock import MagicMock
6+
7+
import aiohttp
8+
import pytest
9+
from aleph_message.models import ItemHash
10+
11+
from aleph.vm.conf import settings
12+
from aleph.vm.orchestrator.supervisor import setup_webapp
13+
from aleph.vm.storage import get_message
14+
15+
16+
@pytest.mark.asyncio
17+
async def test_operator_confidential_initialize_not_authorized(aiohttp_client):
18+
"""Test that the confidential initialize endpoint rejects if the sender is not the good one. Auth needed"""
19+
20+
settings.ENABLE_QEMU_SUPPORT = True
21+
settings.ENABLE_CONFIDENTIAL_COMPUTING = True
22+
settings.setup()
23+
24+
class FakeExecution:
25+
message = None
26+
is_running: bool = True
27+
is_confidential: bool = False
28+
29+
class FakeVmPool:
30+
executions: dict[ItemHash, FakeExecution] = {}
31+
32+
def __init__(self):
33+
self.executions[settings.FAKE_INSTANCE_ID] = FakeExecution()
34+
35+
with mock.patch(
36+
"aleph.vm.orchestrator.views.authentication.authenticate_jwk",
37+
return_value="",
38+
):
39+
with mock.patch(
40+
"aleph.vm.orchestrator.views.operator.is_sender_authorized",
41+
return_value=False,
42+
) as is_sender_authorized_mock:
43+
app = setup_webapp()
44+
app["vm_pool"] = FakeVmPool()
45+
client = await aiohttp_client(app)
46+
response = await client.post(
47+
f"/control/machine/{settings.FAKE_INSTANCE_ID}/confidential/initialize",
48+
)
49+
assert response.status == 403
50+
assert await response.text() == "Unauthorized sender"
51+
is_sender_authorized_mock.assert_called_once()
52+
53+
54+
@pytest.mark.asyncio
55+
async def test_operator_confidential_initialize_already_running(aiohttp_client):
56+
"""Test that the confidential initialize endpoint rejects if the VM is already running. Auth needed"""
57+
58+
settings.ENABLE_QEMU_SUPPORT = True
59+
settings.ENABLE_CONFIDENTIAL_COMPUTING = True
60+
settings.setup()
61+
62+
vm_hash = ItemHash(settings.FAKE_INSTANCE_ID)
63+
instance_message = await get_message(ref=vm_hash)
64+
65+
class FakeExecution:
66+
message = instance_message.content
67+
is_running: bool = True
68+
is_confidential: bool = False
69+
70+
class FakeVmPool:
71+
executions: dict[ItemHash, FakeExecution] = {}
72+
73+
def __init__(self):
74+
self.executions[vm_hash] = FakeExecution()
75+
76+
with mock.patch(
77+
"aleph.vm.orchestrator.views.authentication.authenticate_jwk",
78+
return_value=instance_message.sender,
79+
):
80+
app = setup_webapp()
81+
app["vm_pool"] = FakeVmPool()
82+
client = await aiohttp_client(app)
83+
response = await client.post(
84+
f"/control/machine/{vm_hash}/confidential/initialize",
85+
json={"persistent_vms": []},
86+
)
87+
assert response.status == 403
88+
assert await response.text() == f"VM with ref {vm_hash} already running"
89+
90+
91+
@pytest.mark.asyncio
92+
async def test_operator_confidential_initialize(aiohttp_client):
93+
"""Test that the certificates system endpoint responds. No auth needed"""
94+
95+
settings.ENABLE_QEMU_SUPPORT = True
96+
settings.ENABLE_CONFIDENTIAL_COMPUTING = True
97+
settings.setup()
98+
99+
vm_hash = ItemHash(settings.FAKE_INSTANCE_ID)
100+
instance_message = await get_message(ref=vm_hash)
101+
102+
class FakeExecution:
103+
message = instance_message.content
104+
is_running: bool = False
105+
is_confidential: bool = True
106+
controller_service: str = ""
107+
108+
class MockSystemDManager:
109+
enable_and_start = MagicMock(return_value=True)
110+
111+
class FakeVmPool:
112+
executions: dict[ItemHash, FakeExecution] = {}
113+
114+
def __init__(self):
115+
self.executions[vm_hash] = FakeExecution()
116+
self.systemd_manager = MockSystemDManager()
117+
118+
with tempfile.NamedTemporaryFile() as temp_file:
119+
form_data = aiohttp.FormData()
120+
form_data.add_field("session", open(temp_file.name, "rb"), filename="session.b64")
121+
form_data.add_field("godh", open(temp_file.name, "rb"), filename="godh.b64")
122+
123+
with mock.patch(
124+
"aleph.vm.orchestrator.views.authentication.authenticate_jwk",
125+
return_value=instance_message.sender,
126+
):
127+
app = setup_webapp()
128+
app["vm_pool"] = FakeVmPool()
129+
client = await aiohttp_client(app)
130+
response = await client.post(
131+
f"/control/machine/{vm_hash}/confidential/initialize",
132+
data=form_data,
133+
)
134+
assert response.status == 200
135+
assert await response.text() == f"Started VM with ref {vm_hash}"
136+
app["vm_pool"].systemd_manager.enable_and_start.assert_called_once()

tests/supervisor/test_qemu_instance.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ async def test_create_qemu_instance():
5353
settings.USE_FAKE_INSTANCE_BASE = True
5454
settings.FAKE_INSTANCE_MESSAGE = settings.FAKE_INSTANCE_QEMU_MESSAGE
5555
settings.FAKE_INSTANCE_BASE = settings.FAKE_QEMU_INSTANCE_BASE
56+
settings.ENABLE_CONFIDENTIAL_COMPUTING = False
5657
settings.ALLOW_VM_NETWORKING = False
5758
settings.USE_JAILER = False
5859

@@ -108,6 +109,7 @@ async def test_create_qemu_instance_online():
108109
settings.USE_FAKE_INSTANCE_BASE = True
109110
settings.FAKE_INSTANCE_MESSAGE = settings.FAKE_INSTANCE_QEMU_MESSAGE
110111
settings.FAKE_INSTANCE_BASE = settings.FAKE_QEMU_INSTANCE_BASE
112+
settings.ENABLE_CONFIDENTIAL_COMPUTING = False
111113
settings.ALLOW_VM_NETWORKING = True
112114
settings.USE_JAILER = False
113115

0 commit comments

Comments
 (0)