diff --git a/build_sdk.py b/build_sdk.py index f9c9fffc..9dc72d55 100644 --- a/build_sdk.py +++ b/build_sdk.py @@ -63,6 +63,7 @@ class ConfigInfo: "KernelPlatform": "tqma8xqp1gb", "KernelIsMCS": True, "KernelArmExportPCNTUser": True, + "KernelArmHypervisorSupport": True, }, examples={ "ethernet": Path("example/tqma8xqp1gb/ethernet") @@ -77,6 +78,7 @@ class ConfigInfo: "KernelARMPlatform": "zcu102", "KernelIsMCS": True, "KernelArmExportPCNTUser": True, + "KernelArmHypervisorSupport": True, }, examples={ "hello": Path("example/zcu102/hello") @@ -90,6 +92,7 @@ class ConfigInfo: "KernelPlatform": "maaxboard", "KernelIsMCS": True, "KernelArmExportPCNTUser": True, + "KernelArmHypervisorSupport": True, }, examples={ "hello": Path("example/maaxboard/hello") @@ -103,6 +106,7 @@ class ConfigInfo: "KernelPlatform": "imx8mm-evk", "KernelIsMCS": True, "KernelArmExportPCNTUser": True, + "KernelArmHypervisorSupport": True, }, examples={ "passive_server": Path("example/imx8mm_evk/passive_server") @@ -116,6 +120,7 @@ class ConfigInfo: "KernelPlatform": "imx8mq-evk", "KernelIsMCS": True, "KernelArmExportPCNTUser": True, + "KernelArmHypervisorSupport": True, }, examples={ "hello": Path("example/imx8mq_evk/hello") @@ -129,6 +134,7 @@ class ConfigInfo: "KernelPlatform": "odroidc2", "KernelIsMCS": True, "KernelArmExportPCNTUser": True, + "KernelArmHypervisorSupport": True, }, examples={ "hello": Path("example/odroidc2/hello") @@ -142,6 +148,7 @@ class ConfigInfo: "KernelPlatform": "odroidc4", "KernelIsMCS": True, "KernelArmExportPCNTUser": True, + "KernelArmHypervisorSupport": True, }, examples={ "timer": Path("example/odroidc4/timer") @@ -156,6 +163,7 @@ class ConfigInfo: "KernelIsMCS": True, "KernelArmExportPCNTUser": True, "QEMU_MEMORY": "2048", + "KernelArmHypervisorSupport": True, }, examples={ "hello": Path("example/qemu_virt_aarch64/hello"), @@ -495,7 +503,8 @@ def main() -> None: for config in selected_configs: build_sel4(sel4_dir, root_dir, build_dir, board, config) loader_defines = [ - ("LINK_ADDRESS", hex(board.loader_link_address)) + ("LINK_ADDRESS", hex(board.loader_link_address)), + ("PHYSICAL_ADDRESS_BITS", 40) ] build_elf_component("loader", root_dir, build_dir, board, config, loader_defines) build_elf_component("monitor", root_dir, build_dir, board, config, []) diff --git a/dev_build.py b/dev_build.py index 869cef15..d6faa684 100644 --- a/dev_build.py +++ b/dev_build.py @@ -6,7 +6,7 @@ This is designed to make it easy to build and run examples during development. """ from argparse import ArgumentParser -from os import environ +from os import environ, system from pathlib import Path from shutil import rmtree from subprocess import run @@ -77,6 +77,10 @@ def main(): if not BUILD_DIR.exists(): BUILD_DIR.mkdir() + tool_rebuild = f"cd tool/microkit && cargo build && cp target/debug/microkit {release}/bin/microkit" + r = system(tool_rebuild) + assert r == 0 + make_env = environ.copy() make_env["BUILD_DIR"] = str(BUILD_DIR.absolute()) make_env["MICROKIT_BOARD"] = args.board diff --git a/docs/manual.md b/docs/manual.md index 3306b6e9..00babf79 100644 --- a/docs/manual.md +++ b/docs/manual.md @@ -95,10 +95,11 @@ This document attempts to clearly describe all of these terms, however as the co * [system](#system) * [protection domain (PD)](#pd) -* [channel](#channel) +* [virtual machine (VM)](#vm) * [memory region](#mr) -* [notification](#notification) +* [channel](#channel) * [protected procedure](#pp) +* [notification](#notification) * [interrupt](#irq) * [fault](#fault) @@ -172,6 +173,25 @@ Runnable PDs of the same priority are scheduled in a round-robin manner. The **passive** determines whether the PD is passive. A passive PD will have its scheduling context revoked after initialisation and then bound instead to the PD's notification object. This means the PD will be scheduled on receiving a notification, whereby it will run on the notification's scheduling context. When the PD receives a *protected procedure* by another PD or a *fault* caused by a child PD, the passive PD will run on the scheduling context of the callee. +## Virtual Machine {#vm} + +A *virtual machine* (VM) is a runtime abstraction for running guest operating systems in Microkit. It is similar +to a protection domain in that it provides a thread of control that executes within an isolated virtual address space. + +The main difference between a VM and a PD is that VMs have a higher privilege level such that they may function as a +guest operations and have their own user-space programs at a separate exception level. + +The virtual machine is always a child of a PD. Exceptions caused by the virtual machine are delivered to the parent PD +through the `fault` entry point. Each virtual machine has a 'virtual CPU' associated with it which is used to identify +the fault. At the moment, all VMs only have a single virtual CPU but in the future multi-vCPU VMs will be allowed. + +The parent PD is responsible for starting and managing the virtual machine. Microkit provides the abstractions in +order to manage the virtual machine through seL4 but there is typically a non-trivial amount +of supporting code/infrastructure to properly start and manage a VM. + +To keep the (potentially untrusted) virtual machine isolated from the rest of the system, Microkit enforces that a +protection domain can only ever manage a single virtual machine. + ## Memory Regions {#mr} A *memory region* is a contiguous range of physical memory. @@ -283,12 +303,10 @@ a designated 'fault handler'. By default, all faults caused by protection domain details about the fault in a debug configuration. When a protection domain is a child of another protection domain, the designated fault handler for the child is the parent -protection domain. +protection domain. The same applies for a virtual machine. -This means that whenever a fault is caused by a child PD, it will be delivered to the parent PD instead of the system fault -handler via the `fault` entry point. It is then up to the parent to decide how the fault is handled. The label of the given -`msginfo` can be used to determine what kind of fault occurred. You can find more information about decoding the fault in the -'Faults' section of the seL4 manual. +This means that whenever a fault is caused by a child, it will be delivered to the parent PD instead of the system fault +handler via the `fault` entry point. It is then up to the parent to decide how the fault is handled. # SDK {#sdk} @@ -321,9 +339,10 @@ The ELF files provided as program images should be standard ELF files and have b ## System Requirements -The Microkit tool requires Linux x86_64, macOS x86_64 or macOS AArch64. +The Microkit tool requires Linux (x86-64), macOS (x86-64 or AArch64). On Linux, the Microkit tool is statically linked and should run on any distribution. + On macOS, the Microkit tool should run on macOS 10.12 (Sierra) or higher. The Microkit tool does not depend on any additional system binaries. @@ -372,9 +391,10 @@ If the protection domain provides a protected procedure it must also implement: microkit_msginfo protected(microkit_channel ch, microkit_msginfo msginfo); -If the protection domain has child protection domains it must also implement: +If the protection domain has children it must also implement: - void fault(microkit_pd pd, microkit_msginfo msginfo); + seL4_Bool fault(microkit_child child, microkit_msginfo msginfo, + microkit_msginfo *reply_msginfo); `libmicrokit` provides the following functions: @@ -384,10 +404,18 @@ If the protection domain has child protection domains it must also implement: seL4_Word microkit_msginfo_get_label(microkit_msginfo msginfo); seL4_Word microkit_msginfo_get_count(microkit_msginfo msginfo); void microkit_irq_ack(microkit_channel ch); - void microkit_pd_restart(microkit_pd pd, seL4_Word entry_point); - void microkit_pd_stop(microkit_pd pd); + void microkit_pd_restart(microkit_child pd, seL4_Word entry_point); + void microkit_pd_stop(microkit_child pd); void microkit_mr_set(seL4_Uint8 mr, seL4_Word value); seL4_Word microkit_mr_get(seL4_Uint8 mr); + void microkit_vcpu_restart(microkit_child vcpu, seL4_Word entry_point); + void microkit_vcpu_stop(microkit_child vcpu); + void microkit_vcpu_arm_inject_irq(microkit_child vcpu, seL4_Uint16 irq, + seL4_Uint8 priority, seL4_Uint8 group, + seL4_Uint8 index); + void microkit_vcpu_arm_ack_vppi(microkit_child vcpu, seL4_Word irq); + seL4_Word microkit_vcpu_arm_read_reg(microkit_child vcpu, seL4_Word reg); + void microkit_vcpu_arm_write_reg(microkit_child vcpu, seL4_Word reg, seL4_Word value); ## `void init(void)` @@ -405,7 +433,7 @@ The `notified` entry point is called by the system when a PD has received a noti Channel identifiers are specified in the system configuration. -## `microkit_msginfo protected(microkit_channel ch, microkit_msginfo message)` +## `microkit_msginfo protected(microkit_channel ch, microkit_msginfo msginfo)` The `protected` entry point is optional. This is invoked when another PD calls `microkit_ppcall` on a channel shared with the PD. @@ -415,34 +443,44 @@ Indirectly this identifies the PD performing the call. Channel identifiers are specified in the system configuration. **Note:** The channel argument is passed by the system and is unforgeable. -The `message` argument is the argument passed to the PP and is provided by the calling PD. +The `msginfo` argument is the argument passed to the PP and is provided by the calling PD. The contents of the message is up to a pre-arranged protocol between the PDs. The message contents are opaque to the system. Note: The message is *copied* from the caller. -The returned `message` is the return value of the protected procedure. +The returned `microkit_msginfo` is the return value of the protected procedure. As with arguments, this is *copied* to the caller. -## `void fault(microkit_pd pd, microkit_msginfo msginfo)` +## `seL4_Bool fault(microkit_child child, microkit_msginfo msginfo, microkit_msginfo *reply_msginfo)` -The `fault` entry point depends on whether the given PD has children. -This is invoked when a child PD causes a fault. +The `fault` entry point being invoked depends on whether the given PD has children. +It is invoked when a child PD or VM causes a fault. -The `pd` argument identifies the child PD that caused the fault. +The `child` argument identifies the child that caused the fault. The `msginfo` argument is given by the seL4 kernel when a fault occurs and contains information as to what fault occurred. -You can use `microkit_msginfo_get_label` to deduce what kind of fault happened (for example, whether -it was a user exception or a virtual memory fault). The type `seL4_Fault_tag_t` defines the kinds of -faults for a given architecture that could occur. You can find the list of fault labels in the 'Faults' -section of the seL4 manual. +The `reply_msginfo` argument is given by libmicrokit and can be used to reply to the fault. + +The returned `seL4_Bool` is whether or not to reply to the fault with the message `reply_msginfo`. +Returning `seL4_True` will reply to the fault. Returning `seL4_False` will not reply to the fault. + +You can use `microkit_msginfo_get_label` on `msginfo` to deduce what kind of fault happened +(for example, whether it was a user exception or a virtual memory fault). -## `microkit_msginfo microkit_ppcall(microkit_channel channel, microkit_msginfo message)` +Whether or not you reply to the fault depends on the type of fault that has occurred and how you want +to handle it. + +To find the full list of possible faults that could occur and details regarding to replying to a particular +kind of fault, please see the 'Faults' section of the +[seL4 reference manual](https://sel4.systems/Info/Docs/seL4-manual-latest.pdf). + +## `microkit_msginfo microkit_ppcall(microkit_channel ch, microkit_msginfo msginfo)` Performs a call to a protected procedure in a different PD. The `ch` argument identifies the protected procedure to be called. -`message` is passed as argument to the protected procedure. +`msginfo` is passed as argument to the protected procedure. Channel identifiers are specified in the system configuration. The protected procedure's return data is returned in the `microkit_msginfo`. @@ -456,14 +494,14 @@ Channel identifiers are specified in the system configuration. Acknowledge the interrupt identified by the specified channel. -## `void microkit_pd_restart(microkit_pd id, uintptr_t entry_point)` +## `void microkit_pd_restart(microkit_child pd, uintptr_t entry_point)` -Restart the execution of a child protection domain with ID `id` at the given `entry_point`. +Restart the execution of a child protection domain with ID `pd` at the given `entry_point`. This will set the program counter of the child protection domain to `entry_point`. -## `void microkit_pd_stop(microkit_pd id)` +## `void microkit_pd_stop(microkit_child pd)` -Stop the execution of the child protection domain with ID `id`. +Stop the execution of the child protection domain with ID `pd`. ## `microkit_msginfo microkit_msginfo_new(uint64_t label, uint16_t count)` @@ -471,7 +509,7 @@ Creates a new message structure. The message can be passed to `microkit_ppcall` or returned from `protected`. -## `uint64_t microkit_msginfo_get_label(microkit_msginfo message)` +## `uint64_t microkit_msginfo_get_label(microkit_msginfo msginfo)` Returns the label from a message. @@ -487,6 +525,41 @@ Get a message register. Set a message register. +## `void microkit_vcpu_restart(microkit_child vcpu, seL4_Word entry_point)` + +Restart the execution of a VM's virtual CPU with ID `vcpu` at the given `entry point`. +This will set the program counter of the vCPU to `entry_point`. + +## `void microkit_vcpu_stop(microkit_child vcpu)` + +Stop the execution of the VM's virtual CPU with ID `vcpu`. + +## `void microkit_vcpu_arm_inject_irq(microkit_child vcpu, seL4_Uint16 irq, + seL4_Uint8 priority, seL4_Uint8 group, + seL4_Uint8 index)` + +Inject a virtual ARM interrupt for a virtual CPU `vcpu` with IRQ number `irq`. +The `priority` (0-31) determines what ARM GIC (generic interrupt controller) +priority level the virtual IRQ will be injected as. The `group` determines whether +the virtual IRQ will be injected into secure world (1) or non-secure world (0). +The `index` is the index of the virtual GIC list register. + +## `void microkit_vcpu_arm_ack_vppi(microkit_child vcpu, seL4_Word irq)` + +Acknowledge a ARM virtual Private Peripheral Interrupt (PPI) with IRQ number `irq` +for a VM's vCPU with ID `vcpu`. + +## `seL4_Word microkit_vcpu_arm_read_reg(microkit_child vcpu, seL4_Word reg)` + +Read a register for a given virtual CPU with ID `vcpu`. The `reg` argument is the index of the +register that is read. The list of registers is defined by the enum `seL4_VCPUReg` +in the seL4 source code. + +## `void microkit_vcpu_arm_write_reg(microkit_child vcpu, seL4_Word reg, seL4_Word value)` + +Write to a register for a given virtual CPU with ID `vcpu`. The `reg` argument is the index of the +register that is written to. The `value` argument is what the register will be set to. +The list of registers is defined by the enum `seL4_VCPUReg` in the seL4 source code. # System Description File {#sysdesc} @@ -523,6 +596,7 @@ Additionally, it supports the following child elements: * `irq`: (zero or more) Describes hardware interrupt associations. * `setvar`: (zero or more) Describes variable rewriting. * `protection_domain`: (zero or more) Describes a child protection domain. +* `virtual_machine`: (zero or one) Describes a child virtual machine. The `program_image` element has a single `path` attribute describing the path to an ELF file. @@ -539,17 +613,33 @@ The `irq` element has the following attributes: * `irq`: The hardware interrupt number. * `id`: The channel identifier. Must be at least 0 and less than 63. * `trigger`: (optional) Whether the IRQ is edge triggered ("edge") or level triggered ("level"). Defaults to "level". -\ The `setvar` element has the following attributes: * `symbol`: Name of a symbol in the ELF file. * `region_paddr`: Name of an MR. The symbol's value shall be updated to this MR's physical address. -The `protection_domain` element the same attributes as any other protection domain as well as: +The `protection_domain` element has the same attributes as any other protection domain as well as: * `id`: The ID of the child for the parent to refer to. +The `virtual_machine` element has the following attributes: + +* `name`: A unique name for the virtual machine +* `priority`: The priority of the virtual machine (integer 0 to 254). +* `budget`: (optional) The VM's budget in microseconds; defaults to 1,000. +* `period`: (optional) The VM's period in microseconds; must not be smaller than the budget; defaults to the budget. + +Additionally, it supports the following child elements: + +* `vcpu`: (exactly one) Describes the virtual CPU that will be tied to the virtual machine. At the moment only one + vCPU is supported and so only one of these elements can exist for a virtual machine. +* `map`: (zero or more) Describes mapping of memory regions into the virtual machine. + +The `vcpu` element has a single `id` attribute defining the identifier used for the virutal machine's vCPU. + +The `map` element has the same attributes as the protection domain with the exception of `setvar_vaddr`. + ## `memory_region` The `memory_region` element describes a memory region. @@ -700,7 +790,7 @@ Cortex-A53 CPU. You can use the following command to simulate a Microkit system: $ qemu-system-aarch64 \ - -machine virt \ + -machine virt,virtualization=on \ -cpu cortex-a53 \ -nographic \ -serial mon:stdio \ @@ -726,7 +816,7 @@ For simulating the ZCU102 using QEMU, use the following command: $ qemu-system-aarch64 \ -m size=4G \ - -machine xlnx-zcu102 \ + -machine xlnx-zcu102,virtualization=on \ -nographic \ -device loader,file=[SYSTEM IMAGE],addr=0x40000000,cpu-num=0 \ -serial mon:stdio diff --git a/example/qemu_virt_aarch64/hierarchy/restarter.c b/example/qemu_virt_aarch64/hierarchy/restarter.c index a7075625..20e3be45 100644 --- a/example/qemu_virt_aarch64/hierarchy/restarter.c +++ b/example/qemu_virt_aarch64/hierarchy/restarter.c @@ -42,17 +42,20 @@ seL4_MessageInfo_t protected(microkit_channel ch, microkit_msginfo msginfo) return microkit_msginfo_new(0, 0); } -void fault(microkit_pd id, microkit_msginfo msginfo) +seL4_Bool fault(microkit_child child, microkit_msginfo msginfo, microkit_msginfo *reply_msginfo) { - microkit_dbg_puts("restarter: received fault message for pd: "); - put8(id); + microkit_dbg_puts("restarter: received fault message for child pd: "); + put8(child); microkit_dbg_puts("\n"); restart_count++; if (restart_count < 10) { - microkit_pd_restart(id, 0x200000); + microkit_pd_restart(child, 0x200000); microkit_dbg_puts("restarter: restarted\n"); } else { - microkit_pd_stop(id); + microkit_pd_stop(child); microkit_dbg_puts("restarter: too many restarts - PD stopped\n"); } + + /* We explicitly restart the thread so we do not need to 'reply' to the fault. */ + return seL4_False; } diff --git a/libmicrokit/include/microkit.h b/libmicrokit/include/microkit.h index 8ebbdc05..66b86d2d 100644 --- a/libmicrokit/include/microkit.h +++ b/libmicrokit/include/microkit.h @@ -12,7 +12,7 @@ #include typedef unsigned int microkit_channel; -typedef unsigned int microkit_pd; +typedef unsigned int microkit_child; typedef seL4_MessageInfo_t microkit_msginfo; #define MONITOR_EP 5 @@ -20,6 +20,8 @@ typedef seL4_MessageInfo_t microkit_msginfo; #define BASE_ENDPOINT_CAP 74 #define BASE_IRQ_CAP 138 #define BASE_TCB_CAP 202 +#define BASE_VM_TCB_CAP 266 +#define BASE_VCPU_CAP 330 #define MICROKIT_MAX_CHANNELS 62 @@ -27,7 +29,7 @@ typedef seL4_MessageInfo_t microkit_msginfo; void init(void); void notified(microkit_channel ch); microkit_msginfo protected(microkit_channel ch, microkit_msginfo msginfo); -void fault(microkit_pd pd, microkit_msginfo msginfo); +seL4_Bool fault(microkit_child child, microkit_msginfo msginfo, microkit_msginfo *reply_msginfo); extern char microkit_name[16]; /* These next three variables are so our PDs can combine a signal with the next Recv syscall */ @@ -68,7 +70,7 @@ static inline void microkit_irq_ack(microkit_channel ch) seL4_IRQHandler_Ack(BASE_IRQ_CAP + ch); } -static inline void microkit_pd_restart(microkit_pd pd, seL4_Word entry_point) +static inline void microkit_pd_restart(microkit_child pd, seL4_Word entry_point) { seL4_Error err; seL4_UserContext ctxt = {0}; @@ -87,7 +89,7 @@ static inline void microkit_pd_restart(microkit_pd pd, seL4_Word entry_point) } } -static inline void microkit_pd_stop(microkit_pd pd) +static inline void microkit_pd_stop(microkit_child pd) { seL4_Error err; err = seL4_TCB_Suspend(BASE_TCB_CAP + pd); @@ -126,3 +128,78 @@ static seL4_Word microkit_mr_get(seL4_Uint8 mr) { return seL4_GetMR(mr); } + +/* The following APIs are only available where the kernel is built as a hypervisor. */ +#if defined(CONFIG_ARM_HYPERVISOR_SUPPORT) +static inline void microkit_vcpu_restart(microkit_child vcpu, seL4_Word entry_point) +{ + seL4_Error err; + seL4_UserContext ctxt = {0}; + ctxt.pc = entry_point; + err = seL4_TCB_WriteRegisters( + BASE_VM_TCB_CAP + vcpu, + seL4_True, + 0, /* No flags */ + 1, /* writing 1 register */ + &ctxt + ); + + if (err != seL4_NoError) { + microkit_dbg_puts("microkit_vm_restart: error writing registers\n"); + microkit_internal_crash(err); + } +} + +static inline void microkit_vcpu_stop(microkit_child vcpu) +{ + seL4_Error err; + err = seL4_TCB_Suspend(BASE_VM_TCB_CAP + vcpu); + if (err != seL4_NoError) { + microkit_dbg_puts("microkit_vm_stop: error suspending TCB\n"); + microkit_internal_crash(err); + } +} + +static inline void microkit_vcpu_arm_inject_irq(microkit_child vcpu, seL4_Uint16 irq, seL4_Uint8 priority, + seL4_Uint8 group, seL4_Uint8 index) +{ + seL4_Error err; + err = seL4_ARM_VCPU_InjectIRQ(BASE_VCPU_CAP + vcpu, irq, priority, group, index); + if (err != seL4_NoError) { + microkit_dbg_puts("microkit_arm_vcpu_inject_irq: error injecting IRQ\n"); + microkit_internal_crash(err); + } +} + +static inline void microkit_vcpu_arm_ack_vppi(microkit_child vcpu, seL4_Word irq) +{ + seL4_Error err; + err = seL4_ARM_VCPU_AckVPPI(BASE_VCPU_CAP + vcpu, irq); + if (err != seL4_NoError) { + microkit_dbg_puts("microkit_arm_vcpu_ack_vppi: error acking VPPI\n"); + microkit_internal_crash(err); + } +} + +static inline seL4_Word microkit_vcpu_arm_read_reg(microkit_child vcpu, seL4_Word reg) +{ + seL4_ARM_VCPU_ReadRegs_t ret; + ret = seL4_ARM_VCPU_ReadRegs(BASE_VCPU_CAP + vcpu, reg); + if (ret.error != seL4_NoError) { + microkit_dbg_puts("microkit_arm_vcpu_read_reg: error reading vCPU register\n"); + microkit_internal_crash(ret.error); + } + + return ret.value; +} + +static inline void microkit_vcpu_arm_write_reg(microkit_child vcpu, seL4_Word reg, seL4_Word value) +{ + seL4_Error err; + err = seL4_ARM_VCPU_WriteRegs(BASE_VCPU_CAP + vcpu, reg, value); + if (err != seL4_NoError) { + microkit_dbg_puts("microkit_arm_vcpu_write_reg: error writing vCPU register\n"); + microkit_internal_crash(err); + } +} +#endif diff --git a/libmicrokit/src/main.c b/libmicrokit/src/main.c index d9990943..5d005db9 100644 --- a/libmicrokit/src/main.c +++ b/libmicrokit/src/main.c @@ -42,8 +42,9 @@ __attribute__((weak)) microkit_msginfo protected(microkit_channel ch, microkit_m return seL4_MessageInfo_new(0, 0, 0, 0); } -__attribute__((weak)) void fault(microkit_pd pd, microkit_msginfo msginfo) +__attribute__((weak)) seL4_Bool fault(microkit_child child, microkit_msginfo msginfo, microkit_msginfo *reply_msginfo) { + return seL4_False; } static void run_init_funcs(void) @@ -78,7 +79,10 @@ static void handler_loop(void) have_reply = false; if (is_fault) { - fault(badge & PD_MASK, tag); + seL4_Bool reply_to_fault = fault(badge & PD_MASK, tag, &reply_tag); + if (reply_to_fault) { + have_reply = true; + } } else if (is_endpoint) { have_reply = true; reply_tag = protected(badge & CHANNEL_MASK, tag); diff --git a/loader/Makefile b/loader/Makefile index d145d164..91917b7e 100644 --- a/loader/Makefile +++ b/loader/Makefile @@ -19,6 +19,9 @@ ifeq ($(strip $(LINK_ADDRESS)),) $(error LINK_ADDRESS must be specified) endif +ifeq ($(strip $(PHYSICAL_ADDRESS_BITS)),) +$(error PHYSICAL_ADDRESS_BITS must be specified) +endif TOOLCHAIN := aarch64-none-elf- CFLAGS := -std=gnu11 -g -O3 -nostdlib -ffreestanding -mcpu=$(GCC_CPU) -DBOARD_$(BOARD) -Wall -Werror -mgeneral-regs-only @@ -29,7 +32,7 @@ LINKSCRIPT_INPUT := loader.ld LINKSCRIPT := $(BUILD_DIR)/link.ld $(BUILD_DIR)/%.o : src/%.S - $(TOOLCHAIN)gcc -x assembler-with-cpp -c -g -mcpu=$(GCC_CPU) $< -o $@ + $(TOOLCHAIN)gcc -x assembler-with-cpp -c -g -DPHYSICAL_ADDRESS_BITS=$(PHYSICAL_ADDRESS_BITS) -mcpu=$(GCC_CPU) $< -o $@ $(BUILD_DIR)/%.o : src/%.s $(TOOLCHAIN)as -g -mcpu=$(GCC_CPU) $< -o $@ diff --git a/loader/src/loader.c b/loader/src/loader.c index 6efca8f0..75417c27 100644 --- a/loader/src/loader.c +++ b/loader/src/loader.c @@ -84,6 +84,7 @@ typedef void (*sel4_entry)( void switch_to_el1(void); void switch_to_el2(void); void el1_mmu_enable(void); +void el2_mmu_enable(void); char _stack[STACK_SIZE] ALIGN(16); @@ -479,6 +480,10 @@ static int ensure_correct_el(void) if (loader_data->flags & FLAG_SEL4_HYP) { if (el != EL2) { puts("LDR|ERROR: seL4 configured as a hypervisor, but not in EL2\n"); + return 1; + } else { + puts("LDR|INFO: Resetting CNTVOFF\n"); + asm volatile("msr cntvoff_el2, xzr"); } } else { if (el == EL2) { @@ -566,6 +571,7 @@ static void configure_gicv2(void) int main(void) { int r; + enum el el; #if defined(BOARD_zcu102) uart_init(); @@ -595,7 +601,14 @@ int main(void) } puts("LDR|INFO: enabling MMU\n"); - el1_mmu_enable(); + el = current_el(); + if (el == EL1) { + el1_mmu_enable(); + } else if (el == EL2) { + el2_mmu_enable(); + } else { + puts("LDR|ERROR: unknown EL level for MMU enable\n"); + } puts("LDR|INFO: jumping to kernel\n"); start_kernel(); diff --git a/loader/src/util64.S b/loader/src/util64.S index c27e532b..c1d97565 100644 --- a/loader/src/util64.S +++ b/loader/src/util64.S @@ -60,6 +60,20 @@ #define TCR_PS_16T (4 << 16) #define TCR_PS_256T (5 << 16) +#ifndef PHYSICAL_ADDRESS_BITS +#error "Missing PHYSICAL_ADDRESS_BITS" +#endif + +#if PHYSICAL_ADDRESS_BITS == 40 +#define TCR_PS TCR_PS_1T +#elif PHYSICAL_ADDRESS_BITS == 44 +#define TCR_PS TCR_PS_16T +#else +#error "Unsupported PHYSICAL_ADDRESS_BITS value" +#endif + +#define TCR_EL2_RES1 ((1 << 23) | (1 << 31)) + #define TCR_ASID16 (1 << 36) /* Assembler Macros */ @@ -326,6 +340,65 @@ BEGIN_FUNC(el1_mmu_enable) END_FUNC(el1_mmu_enable) +BEGIN_FUNC(el2_mmu_enable) + stp x29, x30, [sp, #-16]! + mov x29, sp + + /* Disable caches */ + bl flush_dcache + + /* Ensure I-cache, D-cache and mmu are disabled for EL2/Stage1 */ + disable_mmu sctlr_el2, x8 + + /* + * Invalidate the local I-cache so that any instructions fetched + * speculatively are discarded. + */ + bl invalidate_icache + + /* + * DEVICE_nGnRnE 000 00000000 + * DEVICE_nGnRE 001 00000100 + * DEVICE_GRE 010 00001100 + * NORMAL_NC 011 01000100 + * NORMAL 100 11111111 + */ + ldr x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \ + MAIR(0x04, MT_DEVICE_nGnRE) | \ + MAIR(0x0c, MT_DEVICE_GRE) | \ + MAIR(0x44, MT_NORMAL_NC) | \ + MAIR(0xff, MT_NORMAL) + msr mair_el2, x5 + + ldr x8, =TCR_T0SZ(48) | TCR_IRGN0_WBWC | TCR_ORGN0_WBWC | TCR_SH0_ISH | TCR_TG0_4K | TCR_PS | TCR_EL2_RES1 + msr tcr_el2, x8 + isb + + /* Setup page tables */ + adrp x8, boot_lvl0_lower + msr ttbr0_el2, x8 + isb + + /* invalidate all TLB entries for EL2 */ + tlbi alle2is + dsb ish + isb + + enable_mmu sctlr_el2, x8 + + /* Invalidate instruction cache */ + ic ialluis + dsb ish + isb + /* invalidate all TLB entries for EL2 */ + tlbi alle2is + dsb ish + isb + + ldp x29, x30, [sp], #16 + ret + +END_FUNC(el2_mmu_enable) .extern exception_handler .extern exception_register_state diff --git a/tool/microkit/Cargo.lock b/tool/microkit/Cargo.lock index 02863bce..4c2aef13 100644 --- a/tool/microkit/Cargo.lock +++ b/tool/microkit/Cargo.lock @@ -2,11 +2,37 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + [[package]] name = "microkit-tool" version = "0.1.0" dependencies = [ "roxmltree", + "serde", + "serde_json", +] + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", ] [[package]] @@ -14,3 +40,57 @@ name = "roxmltree" version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3cd14fd5e3b777a7422cca79358c57a8f6e3a703d9ac187448d0daf220c2407f" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "2.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" diff --git a/tool/microkit/Cargo.toml b/tool/microkit/Cargo.toml index 99e82109..b0436e84 100644 --- a/tool/microkit/Cargo.toml +++ b/tool/microkit/Cargo.toml @@ -15,3 +15,5 @@ path = "src/main.rs" [dependencies] roxmltree = "0.19.0" +serde = "1.0.203" +serde_json = "1.0.117" diff --git a/tool/microkit/src/loader.rs b/tool/microkit/src/loader.rs index 693b9580..d51ad979 100644 --- a/tool/microkit/src/loader.rs +++ b/tool/microkit/src/loader.rs @@ -7,6 +7,7 @@ use crate::{MemoryRegion}; use crate::util::{round_up, mb, kb, mask, struct_to_bytes}; use crate::elf::{ElfFile}; +use crate::sel4::{Config}; use std::path::Path; use std::fs::File; use std::io::{BufWriter, Write}; @@ -85,7 +86,8 @@ pub struct Loader<'a> { } impl<'a> Loader<'a> { - pub fn new(loader_elf_path: &Path, + pub fn new(kernel_config: Config, + loader_elf_path: &Path, kernel_elf: &'a ElfFile, initial_task_elf: &'a ElfFile, initial_task_phys_base: Option, @@ -198,8 +200,10 @@ impl<'a> Loader<'a> { check_non_overlapping(&all_regions); - // FIXME: Should be a way to determine if seL4 needs hypervisor mode or not - let flags = 0; + let flags = match kernel_config.hypervisor { + true => 1, + false => 0, + }; let header = LoaderHeader64 { magic, @@ -285,11 +289,11 @@ impl<'a> Loader<'a> { boot_lvl1_lower[start..end].copy_from_slice(&pt_entry.to_le_bytes()); } - let mut boot_lvl0_upper: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; + let boot_lvl0_upper: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; { let pt_entry = (boot_lvl1_upper_addr | 3).to_le_bytes(); let idx = Aarch64::lvl0_index(first_vaddr); - boot_lvl0_upper[8 * idx..8 * (idx + 1)].copy_from_slice(&pt_entry); + boot_lvl0_lower[8 * idx..8 * (idx + 1)].copy_from_slice(&pt_entry); } let mut boot_lvl1_upper: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; @@ -302,15 +306,14 @@ impl<'a> Loader<'a> { let mut boot_lvl2_upper: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; let lvl2_idx = Aarch64::lvl2_index(first_vaddr); - let mut paddr = first_paddr; for i in lvl2_idx..512 { + let entry_idx = (i - Aarch64::lvl2_index(first_vaddr)) << AARCH64_2MB_BLOCK_BITS; let pt_entry: u64 = - paddr | + (entry_idx as u64 + first_paddr) | (1 << 10) | // Access flag (3 << 8) | // Make sure the shareability is the same as the kernel's (4 << 2) | // MT_NORMAL memory (1 << 0); // 2MB block - paddr += 1 << AARCH64_2MB_BLOCK_BITS; let start = 8 * i; let end = 8 * (i + 1); boot_lvl2_upper[start..end].copy_from_slice(&pt_entry.to_le_bytes()); diff --git a/tool/microkit/src/main.rs b/tool/microkit/src/main.rs index 7a2d3794..fd76cc19 100644 --- a/tool/microkit/src/main.rs +++ b/tool/microkit/src/main.rs @@ -17,9 +17,9 @@ use std::mem::size_of; use microkit_tool::{MAX_PDS, PD_MAX_NAME_LENGTH, sysxml, util, elf, sel4, loader, Region,UntypedObject,ObjectAllocator,MemoryRegion,DisjointMemoryRegion}; use elf::ElfFile; use sel4::{ArmVmAttributes, BootInfo, Object, Invocation, InvocationArgs, ObjectType, Rights, PageSize, Aarch64Regs, Config, Arch}; -use sysxml::{parse, PlatformDescription, SystemDescription, ProtectionDomain, SysMap, SysMapPerms, SysMemoryRegion}; +use sysxml::{parse, PlatformDescription, SystemDescription, ProtectionDomain, VirtualMachine, SysMap, SysMapPerms, SysMemoryRegion}; use loader::Loader; -use util::{bytes_to_struct, struct_to_bytes, comma_sep_usize, comma_sep_u64}; +use util::{json_str_as_u64, json_str_as_bool, bytes_to_struct, struct_to_bytes, comma_sep_usize, comma_sep_u64}; // Corresponds to the IPC buffer symbol in libmicrokit and the monitor const SYMBOL_IPC_BUFFER: &str = "__sel4_ipc_buffer_obj"; @@ -37,11 +37,13 @@ const MONITOR_EP_CAP_IDX: u64 = 5; const BASE_OUTPUT_NOTIFICATION_CAP: u64 = 10; const BASE_OUTPUT_ENDPOINT_CAP: u64 = BASE_OUTPUT_NOTIFICATION_CAP + 64; const BASE_IRQ_CAP: u64 = BASE_OUTPUT_ENDPOINT_CAP + 64; -const BASE_TCB_CAP: u64 = BASE_IRQ_CAP + 64; +const BASE_PD_TCB_CAP: u64 = BASE_IRQ_CAP + 64; +const BASE_VM_TCB_CAP: u64 = BASE_PD_TCB_CAP + 64; +const BASE_VCPU_CAP: u64 = BASE_VM_TCB_CAP + 64; const MAX_SYSTEM_INVOCATION_SIZE: u64 = util::mb(128); -const PD_CAP_SIZE: u64 = 256; +const PD_CAP_SIZE: u64 = 512; const PD_CAP_BITS: u64 = PD_CAP_SIZE.ilog2() as u64; const PD_SCHEDCONTEXT_SIZE: u64 = 1 << 8; @@ -560,9 +562,9 @@ fn get_arch_n_paging(region: MemoryRegion) -> u64 { const PT_INDEX_OFFSET: u64 = 12; const PD_INDEX_OFFSET: u64 = PT_INDEX_OFFSET + 9; const PUD_INDEX_OFFSET: u64 = PD_INDEX_OFFSET + 9; - const PGD_INDEX_OFFSET: u64 = PUD_INDEX_OFFSET + 9; + // const PGD_INDEX_OFFSET: u64 = PUD_INDEX_OFFSET + 9; - get_n_paging(region, PGD_INDEX_OFFSET) + + // get_n_paging(region, PGD_INDEX_OFFSET) + get_n_paging(region, PUD_INDEX_OFFSET) + get_n_paging(region, PD_INDEX_OFFSET) } @@ -570,7 +572,7 @@ fn get_arch_n_paging(region: MemoryRegion) -> u64 { fn rootserver_max_size_bits() -> u64 { let slot_bits = 5; // seL4_SlotBits let root_cnode_bits = 12; // CONFIG_ROOT_CNODE_SIZE_BITS - let vspace_bits = 12; // seL4_VSpaceBits + let vspace_bits = 13; // seL4_VSpaceBits let cnode_size_bits = root_cnode_bits + slot_bits; max(cnode_size_bits, vspace_bits) @@ -586,7 +588,7 @@ fn calculate_rootserver_size(initial_task_region: MemoryRegion) -> u64 { let tcb_bits = 11; // seL4_TCBBits let page_bits = 12; // seL4_PageBits let asid_pool_bits = 12; // seL4_ASIDPoolBits - let vspace_bits = 12; // seL4_VSpaceBits + let vspace_bits = 13; // seL4_VSpaceBits let page_table_bits = 12; // seL4_PageTableBits let min_sched_context_bits = 7; // seL4_MinSchedContextBits @@ -1190,14 +1192,39 @@ fn build_system(kernel_config: &Config, mr_pages.get_mut(mr).unwrap().push(page[0].clone()); } - let tcb_names: Vec = system.protection_domains.iter().map(|pd| format!("TCB: PD={}", pd.name)).collect(); + let virtual_machines: Vec<&VirtualMachine> = system + .protection_domains + .iter() + .filter_map(|pd| match &pd.virtual_machine { + Some(vm) => Some(vm), + None => None, + }) + .collect(); + + // TCBs + let mut tcb_names: Vec = system.protection_domains.iter().map(|pd| format!("TCB: PD={}", pd.name)).collect(); + let vm_tcb_names: Vec = virtual_machines.iter().map(|vm| format!("TCB: VM={}", vm.name)).collect(); + tcb_names.extend(vm_tcb_names); let tcb_objs = init_system.allocate_objects(ObjectType::Tcb, tcb_names, None); let tcb_caps: Vec = tcb_objs.iter().map(|tcb| tcb.cap_addr).collect(); - let sched_context_names = system.protection_domains.iter().map(|pd| format!("SchedContext: PD={}", pd.name)).collect(); + let pd_tcb_objs = &tcb_objs[..system.protection_domains.len()]; + let vm_tcb_objs = &tcb_objs[system.protection_domains.len()..]; + assert!(pd_tcb_objs.len() + vm_tcb_objs.len() == tcb_objs.len()); + // VCPUs + let vcpu_names: Vec = virtual_machines.iter().map(|vm| format!("VCPU: VM={}", vm.name)).collect(); + let vcpu_objs = init_system.allocate_objects(ObjectType::Vcpu, vcpu_names, None); + // Scheduling Contexts + let mut sched_context_names: Vec = system.protection_domains.iter().map(|pd| format!("SchedContext: PD={}", pd.name)).collect(); + let vm_sched_context_names: Vec = virtual_machines.iter().map(|vm| format!("SchedContext: VM={}", vm.name)).collect(); + sched_context_names.extend(vm_sched_context_names); let sched_context_objs = init_system.allocate_objects(ObjectType::SchedContext, sched_context_names, Some(PD_SCHEDCONTEXT_SIZE)); let sched_context_caps: Vec = sched_context_objs.iter().map(|sc| sc.cap_addr).collect(); + let pd_sched_context_objs = &sched_context_objs[..system.protection_domains.len()]; + let vm_sched_context_objs = &sched_context_objs[system.protection_domains.len()..]; + + // Endpoints let pd_endpoint_names: Vec = system .protection_domains .iter() @@ -1205,7 +1232,7 @@ fn build_system(kernel_config: &Config, .map(|pd| format!("EP: PD={}", pd.name)) .collect(); let endpoint_names = [vec![format!("EP: Monitor Fault")], pd_endpoint_names].concat(); - + // Reply objects let pd_reply_names: Vec = system.protection_domains.iter().map(|pd| format!("Reply: PD={}", pd.name)).collect(); let reply_names = [vec![format!("Reply: Monitor")], pd_reply_names].concat(); let reply_objs = init_system.allocate_objects(ObjectType::Reply, reply_names, None); @@ -1242,9 +1269,9 @@ fn build_system(kernel_config: &Config, // Page table (level 3 table) is based on how many 2 MiB parts of the // address space is covered (excluding any 2MiB regions covered by large // pages). - let mut uds: Vec<(usize, u64)> = Vec::new(); - let mut ds: Vec<(usize, u64)> = Vec::new(); - let mut pts: Vec<(usize, u64)> = Vec::new(); + let mut all_pd_uds: Vec<(usize, u64)> = Vec::new(); + let mut all_pd_ds: Vec<(usize, u64)> = Vec::new(); + let mut all_pd_pts: Vec<(usize, u64)> = Vec::new(); for (pd_idx, pd) in system.protection_domains.iter().enumerate() { let (ipc_buffer_vaddr, _) = pd_elf_files[pd_idx].find_symbol(SYMBOL_IPC_BUFFER) .unwrap_or_else(|_| panic!("Could not find {}", SYMBOL_IPC_BUFFER)); @@ -1268,7 +1295,9 @@ fn build_system(kernel_config: &Config, } for (vaddr, page_size) in vaddrs { - upper_directory_vaddrs.insert(util::mask_bits(vaddr, 12 + 9 + 9 + 9)); + if !kernel_config.hypervisor && kernel_config.arm_pa_size_bits != 40 { + upper_directory_vaddrs.insert(util::mask_bits(vaddr, 12 + 9 + 9 + 9)); + } directory_vaddrs.insert(util::mask_bits(vaddr, 12 + 9 + 9)); if page_size == PageSize::Small { page_table_vaddrs.insert(util::mask_bits(vaddr, 12 + 9)); @@ -1277,37 +1306,100 @@ fn build_system(kernel_config: &Config, let mut pd_uds: Vec<(usize, u64)> = upper_directory_vaddrs.into_iter().map(|vaddr| (pd_idx, vaddr)).collect(); pd_uds.sort_by_key(|ud| ud.1); - uds.extend(pd_uds); + all_pd_uds.extend(pd_uds); let mut pd_ds: Vec<(usize, u64)> = directory_vaddrs.into_iter().map(|vaddr| (pd_idx, vaddr)).collect(); pd_ds.sort_by_key(|d| d.1); - ds.extend(pd_ds); + all_pd_ds.extend(pd_ds); let mut pd_pts: Vec<(usize, u64)> = page_table_vaddrs.into_iter().map(|vaddr| (pd_idx, vaddr)).collect(); pd_pts.sort_by_key(|pt| pt.1); - pts.extend(pd_pts); + all_pd_pts.extend(pd_pts); } + all_pd_uds.sort_by_key(|ud| ud.0); + all_pd_ds.sort_by_key(|d| d.0); + all_pd_pts.sort_by_key(|pt| pt.0); + + let mut all_vm_uds: Vec<(usize, u64)> = Vec::new(); + let mut all_vm_ds: Vec<(usize, u64)> = Vec::new(); + let mut all_vm_pts: Vec<(usize, u64)> = Vec::new(); + for (vm_idx, vm) in virtual_machines.iter().enumerate() { + let mut upper_directory_vaddrs = HashSet::new(); + let mut directory_vaddrs = HashSet::new(); + let mut page_table_vaddrs = HashSet::new(); + + let mut vaddrs = vec![]; + for map in &vm.maps { + let mr = all_mr_by_name[map.mr.as_str()]; + let mut vaddr = map.vaddr; + for _ in 0..mr.page_count { + vaddrs.push((vaddr, mr.page_size)); + vaddr += mr.page_bytes(); + } + } + + for (vaddr, page_size) in vaddrs { + assert!(kernel_config.hypervisor); + if kernel_config.arm_pa_size_bits != 40 { + upper_directory_vaddrs.insert(util::mask_bits(vaddr, 12 + 9 + 9 + 9)); + } + directory_vaddrs.insert(util::mask_bits(vaddr, 12 + 9 + 9)); + if page_size == PageSize::Small { + page_table_vaddrs.insert(util::mask_bits(vaddr, 12 + 9)); + } + } + + let mut vm_uds: Vec<(usize, u64)> = upper_directory_vaddrs.into_iter().map(|vaddr| (vm_idx, vaddr)).collect(); + vm_uds.sort_by_key(|ud| ud.1); + all_vm_uds.extend(vm_uds); - uds.sort_by_key(|ud| ud.0); - ds.sort_by_key(|d| d.0); - pts.sort_by_key(|pt| pt.0); + let mut vm_ds: Vec<(usize, u64)> = directory_vaddrs.into_iter().map(|vaddr| (vm_idx, vaddr)).collect(); + vm_ds.sort_by_key(|d| d.1); + all_vm_ds.extend(vm_ds); + + let mut vm_pts: Vec<(usize, u64)> = page_table_vaddrs.into_iter().map(|vaddr| (vm_idx, vaddr)).collect(); + vm_pts.sort_by_key(|pt| pt.1); + all_vm_pts.extend(vm_pts); + } + all_vm_uds.sort_by_key(|ud| ud.0); + all_vm_ds.sort_by_key(|d| d.0); + all_vm_pts.sort_by_key(|pt| pt.0); let pd_names: Vec<&str> = system.protection_domains.iter().map(|pd| pd.name.as_str()).collect(); + let vm_names: Vec<&str> = virtual_machines.iter().map(|vm| vm.name.as_str()).collect(); - let vspace_names: Vec = system.protection_domains.iter().map(|pd| format!("VSpace: PD={}", pd.name)).collect(); + let mut vspace_names: Vec = system.protection_domains.iter().map(|pd| format!("VSpace: PD={}", pd.name)).collect(); + let vm_vspace_names: Vec = virtual_machines.iter().map(|vm| format!("VSpace: VM={}", vm.name)).collect(); + vspace_names.extend(vm_vspace_names); let vspace_objs = init_system.allocate_objects(ObjectType::VSpace, vspace_names, None); + let pd_vspace_objs = &vspace_objs[..system.protection_domains.len()]; + let vm_vspace_objs = &vspace_objs[system.protection_domains.len()..]; + + let pd_ud_names: Vec = all_pd_uds.iter().map(|(pd_idx, vaddr)| format!("PageTable: PD={} VADDR=0x{:x}", pd_names[*pd_idx], vaddr)).collect(); + let vm_ud_names: Vec = all_vm_uds.iter().map(|(vm_idx, vaddr)| format!("PageTable: VM={} VADDR=0x{:x}", vm_names[*vm_idx], vaddr)).collect(); - let ud_names = uds.iter().map(|(pd_idx, vaddr)| format!("PageTable: PD={} VADDR=0x{:x}", pd_names[*pd_idx], vaddr)).collect(); - let ud_objs = init_system.allocate_objects(ObjectType::PageTable, ud_names, None); + let pd_ud_objs = init_system.allocate_objects(ObjectType::PageTable, pd_ud_names, None); + let vm_ud_objs = init_system.allocate_objects(ObjectType::PageTable, vm_ud_names, None); + + if kernel_config.hypervisor { + assert!(vm_ud_objs.is_empty()); + } - let d_names = ds.iter().map(|(pd_idx, vaddr)| format!("PageTable: PD={} VADDR=0x{:x}", pd_names[*pd_idx], vaddr)).collect(); - let d_objs = init_system.allocate_objects(ObjectType::PageTable, d_names, None); + let pd_d_names: Vec = all_pd_ds.iter().map(|(pd_idx, vaddr)| format!("PageTable: PD={} VADDR=0x{:x}", pd_names[*pd_idx], vaddr)).collect(); + let vm_d_names: Vec = all_vm_ds.iter().map(|(vm_idx, vaddr)| format!("PageTable: VM={} VADDR=0x{:x}", vm_names[*vm_idx], vaddr)).collect(); + let pd_d_objs = init_system.allocate_objects(ObjectType::PageTable, pd_d_names, None); + let vm_d_objs = init_system.allocate_objects(ObjectType::PageTable, vm_d_names, None); - let pt_names = pts.iter().map(|(pd_idx, vaddr)| format!("PageTable: PD={} VADDR=0x{:x}", pd_names[*pd_idx], vaddr)).collect(); - let pt_objs = init_system.allocate_objects(ObjectType::PageTable, pt_names, None); + let pd_pt_names: Vec = all_pd_pts.iter().map(|(pd_idx, vaddr)| format!("PageTable: PD={} VADDR=0x{:x}", pd_names[*pd_idx], vaddr)).collect(); + let vm_pt_names: Vec = all_vm_pts.iter().map(|(vm_idx, vaddr)| format!("PageTable: VM={} VADDR=0x{:x}", vm_names[*vm_idx], vaddr)).collect(); + let pd_pt_objs = init_system.allocate_objects(ObjectType::PageTable, pd_pt_names, None); + let vm_pt_objs = init_system.allocate_objects(ObjectType::PageTable, vm_pt_names, None); // Create CNodes - all CNode objects are the same size: 128 slots. - let cnode_names: Vec = system.protection_domains.iter().map(|pd| format!("CNode: PD={}", pd.name)).collect(); + let mut cnode_names: Vec = system.protection_domains.iter().map(|pd| format!("CNode: PD={}", pd.name)).collect(); + let vm_cnode_names: Vec = virtual_machines.iter().map(|vm| format!("CNode: VM={}", vm.name)).collect(); + cnode_names.extend(vm_cnode_names); + let cnode_objs = init_system.allocate_objects(ObjectType::CNode, cnode_names, Some(PD_CAP_SIZE)); let mut cnode_objs_by_pd: HashMap<&ProtectionDomain, &Object> = HashMap::with_capacity(system.protection_domains.len()); for (i, pd) in system.protection_domains.iter().enumerate() { @@ -1340,11 +1432,12 @@ fn build_system(kernel_config: &Config, } // This has to be done prior to minting! + let num_asid_invocations = system.protection_domains.len() + virtual_machines.len(); let mut asid_invocation = Invocation::new(InvocationArgs::AsidPoolAssign { asid_pool: INIT_ASID_POOL_CAP_ADDRESS, vspace: vspace_objs[0].cap_addr, }); - asid_invocation.repeat(system.protection_domains.len() as u32, InvocationArgs::AsidPoolAssign{ + asid_invocation.repeat(num_asid_invocations as u32, InvocationArgs::AsidPoolAssign{ asid_pool: 0, vspace: 1, }); @@ -1354,7 +1447,7 @@ fn build_system(kernel_config: &Config, // Mint copies of required pages, while also determing what's required // for later mapping - let mut page_descriptors = Vec::new(); + let mut pd_page_descriptors = Vec::new(); for (pd_idx, pd) in system.protection_domains.iter().enumerate() { for map_set in [&pd.maps, &pd_extra_maps[pd]] { for mp in map_set { @@ -1399,7 +1492,7 @@ fn build_system(kernel_config: &Config, }); system_invocations.push(invocation); - page_descriptors.push(( + pd_page_descriptors.push(( system_cap_address_mask | cap_slot, pd_idx, mp.vaddr, @@ -1421,6 +1514,71 @@ fn build_system(kernel_config: &Config, } } + let mut vm_page_descriptors = Vec::new(); + for (vm_idx, vm) in virtual_machines.iter().enumerate() { + for mp in &vm.maps { + let mr = all_mr_by_name[mp.mr.as_str()]; + let mut rights: u64 = Rights::None as u64; + let mut attrs = ArmVmAttributes::ParityEnabled as u64; + if mp.perms & SysMapPerms::Read as u8 != 0 { + rights |= Rights::Read as u64; + } + if mp.perms & SysMapPerms::Write as u8 != 0 { + rights |= Rights::Write as u64; + } + if mp.perms & SysMapPerms::Execute as u8 == 0 { + attrs |= ArmVmAttributes::ExecuteNever as u64; + } + if mp.cached { + attrs |= ArmVmAttributes::Cacheable as u64; + } + + assert!(!mr_pages[mr].is_empty()); + assert!(util::objects_adjacent(&mr_pages[mr])); + + let mut invocation = Invocation::new(InvocationArgs::CnodeMint{ + cnode: system_cnode_cap, + dest_index: cap_slot, + dest_depth: system_cnode_bits, + src_root: root_cnode_cap, + src_obj: mr_pages[mr][0].cap_addr, + src_depth: kernel_config.cap_address_bits, + rights, + badge: 0, + }); + invocation.repeat(mr_pages[mr].len() as u32, InvocationArgs::CnodeMint{ + cnode: 0, + dest_index: 1, + dest_depth: 0, + src_root: 0, + src_obj: 1, + src_depth: 0, + rights: 0, + badge: 0, + }); + system_invocations.push(invocation); + + vm_page_descriptors.push(( + system_cap_address_mask | cap_slot, + vm_idx, + mp.vaddr, + rights, + attrs, + mr_pages[mr].len() as u64, + mr.page_bytes() + )); + + for idx in 0..mr_pages[mr].len() { + cap_address_names.insert( + system_cap_address_mask | (cap_slot + idx as u64), + format!("{} (derived)", cap_address_names.get(&(mr_pages[mr][0].cap_addr + idx as u64)).unwrap()) + ); + } + + cap_slot += mr_pages[mr].len() as u64; + } + } + let mut badged_irq_caps: HashMap<&ProtectionDomain, Vec> = HashMap::new(); for (notification_obj, pd) in zip(¬ification_objs, &system.protection_domains) { badged_irq_caps.insert(pd, vec![]); @@ -1476,6 +1634,37 @@ fn build_system(kernel_config: &Config, cap_slot += 1; } + // Create a fault endpoint cap for each virtual machine. + // This will be the endpoint for the parent protection domain of the virtual machine. + for vm in &virtual_machines { + let mut parent_pd = None; + for (pd_idx, pd) in system.protection_domains.iter().enumerate() { + if let Some(virtual_machine) = &pd.virtual_machine { + if virtual_machine == *vm { + parent_pd = Some(pd_idx); + break; + } + } + } + assert!(parent_pd.is_some()); + + let fault_ep_cap = pd_endpoint_objs[parent_pd.unwrap()].unwrap().cap_addr; + let badge = FAULT_BADGE | vm.vcpu.id; + + let invocation = Invocation::new(InvocationArgs::CnodeMint{ + cnode: system_cnode_cap, + dest_index: cap_slot, + dest_depth: system_cnode_bits, + src_root: root_cnode_cap, + src_obj: fault_ep_cap, + src_depth: kernel_config.cap_address_bits, + rights: Rights::All as u64, + badge, + }); + system_invocations.push(invocation); + cap_slot += 1; + } + let final_cap_slot = cap_slot; // Minting in the address space @@ -1525,6 +1714,7 @@ fn build_system(kernel_config: &Config, // Mint access to the VSpace cap assert!(VSPACE_CAP_IDX < PD_CAP_SIZE); + let num_vspace_mint_invocations = system.protection_domains.len() + virtual_machines.len(); let mut vspace_mint_invocation = Invocation::new(InvocationArgs::CnodeMint { cnode: cnode_objs[0].cap_addr, dest_index: VSPACE_CAP_IDX, @@ -1535,7 +1725,7 @@ fn build_system(kernel_config: &Config, rights: Rights::All as u64, badge: 0, }); - vspace_mint_invocation.repeat(system.protection_domains.len() as u32, InvocationArgs::CnodeMint { + vspace_mint_invocation.repeat(num_vspace_mint_invocations as u32, InvocationArgs::CnodeMint { cnode: 1, dest_index: 0, dest_depth: 0, @@ -1573,7 +1763,8 @@ fn build_system(kernel_config: &Config, // We are dealing with a child PD, now check if the index of its parent // matches this iteration's PD. if parent_idx == pd_idx { - let cap_idx = BASE_TCB_CAP + maybe_child_pd.id.unwrap(); + let cap_idx = BASE_PD_TCB_CAP + maybe_child_pd.id.unwrap(); + assert!(cap_idx < PD_CAP_SIZE); system_invocations.push(Invocation::new(InvocationArgs::CnodeMint { cnode: cnode_objs[pd_idx].cap_addr, dest_index: cap_idx, @@ -1589,6 +1780,48 @@ fn build_system(kernel_config: &Config, } } + // Mint access to virtual machine TCBs in the CSpace of parent PDs + for (pd_idx, pd) in system.protection_domains.iter().enumerate() { + if let Some(vm) = &pd.virtual_machine { + // This PD that we are dealing with has a virtual machine, now we + // need to find the TCB that corresponds to it. + let vm_idx = virtual_machines.iter().position(|&x| x == vm).unwrap(); + let cap_idx = BASE_VM_TCB_CAP + vm.vcpu.id; + assert!(cap_idx < PD_CAP_SIZE); + system_invocations.push(Invocation::new(InvocationArgs::CnodeMint { + cnode: cnode_objs[pd_idx].cap_addr, + dest_index: cap_idx, + dest_depth: PD_CAP_BITS, + src_root: root_cnode_cap, + src_obj: vm_tcb_objs[vm_idx].cap_addr, + src_depth: kernel_config.cap_address_bits, + rights: Rights::All as u64, + badge: 0, + })); + } + } + + // Mint access to virtual machine vCPUs in the CSpace of the parent PDs + for (pd_idx, pd) in system.protection_domains.iter().enumerate() { + if let Some(vm) = &pd.virtual_machine { + // This PD that we are dealing with has a virtual machine, now we + // need to find the vCPU that corresponds to it. + let vm_idx = virtual_machines.iter().position(|&x| x == vm).unwrap(); + let cap_idx = BASE_VCPU_CAP + vm.vcpu.id; + assert!(cap_idx < PD_CAP_SIZE); + system_invocations.push(Invocation::new(InvocationArgs::CnodeMint { + cnode: cnode_objs[pd_idx].cap_addr, + dest_index: cap_idx, + dest_depth: PD_CAP_BITS, + src_root: root_cnode_cap, + src_obj: vcpu_objs[vm_idx].cap_addr, + src_depth: kernel_config.cap_address_bits, + rights: Rights::All as u64, + badge: 0, + })); + } + } + for cc in &system.channels { let pd_a = &system.protection_domains[cc.pd_a]; let pd_b = &system.protection_domains[cc.pd_b]; @@ -1696,11 +1929,31 @@ fn build_system(kernel_config: &Config, } // Initialise the VSpaces -- assign them all the the initial asid pool. - for (descriptors, objects) in [(uds, ud_objs), (ds, d_objs), (pts, pt_objs)] { + let pd_vspace_invocations = if kernel_config.hypervisor && kernel_config.arm_pa_size_bits == 40 { + vec![(all_pd_ds, pd_d_objs), (all_pd_pts, pd_pt_objs)] + } else { + vec![(all_pd_uds, pd_ud_objs), (all_pd_ds, pd_d_objs), (all_pd_pts, pd_pt_objs)] + }; + for (descriptors, objects) in pd_vspace_invocations { for ((pd_idx, vaddr), obj) in zip(descriptors, objects) { system_invocations.push(Invocation::new(InvocationArgs::PageTableMap{ page_table: obj.cap_addr, - vspace: vspace_objs[pd_idx].cap_addr, + vspace: pd_vspace_objs[pd_idx].cap_addr, + vaddr, + attr: ArmVmAttributes::default(), + })); + } + } + let vm_vspace_invocations = if kernel_config.hypervisor && kernel_config.arm_pa_size_bits == 40 { + vec![(all_vm_ds, vm_d_objs), (all_vm_pts, vm_pt_objs)] + } else { + vec![(all_vm_uds, vm_ud_objs), (all_vm_ds, vm_d_objs), (all_vm_pts, vm_pt_objs)] + }; + for (descriptors, objects) in vm_vspace_invocations { + for ((vm_idx, vaddr), obj) in zip(descriptors, objects) { + system_invocations.push(Invocation::new(InvocationArgs::PageTableMap{ + page_table: obj.cap_addr, + vspace: vm_vspace_objs[vm_idx].cap_addr, vaddr, attr: ArmVmAttributes::default(), })); @@ -1708,10 +1961,27 @@ fn build_system(kernel_config: &Config, } // Now map all the pages - for (page_cap_address, pd_idx, vaddr, rights, attr, count, vaddr_incr) in page_descriptors { + for (page_cap_address, pd_idx, vaddr, rights, attr, count, vaddr_incr) in pd_page_descriptors { + let mut invocation = Invocation::new(InvocationArgs::PageMap { + page: page_cap_address, + vspace: pd_vspace_objs[pd_idx].cap_addr, + vaddr, + rights, + attr, + }); + invocation.repeat(count as u32, InvocationArgs::PageMap { + page: 1, + vspace: 0, + vaddr: vaddr_incr, + rights: 0, + attr: 0, + }); + system_invocations.push(invocation); + } + for (page_cap_address, vm_idx, vaddr, rights, attr, count, vaddr_incr) in vm_page_descriptors { let mut invocation = Invocation::new(InvocationArgs::PageMap { page: page_cap_address, - vspace: vspace_objs[pd_idx].cap_addr, + vspace: vm_vspace_objs[vm_idx].cap_addr, vaddr, rights, attr, @@ -1732,7 +2002,7 @@ fn build_system(kernel_config: &Config, .unwrap_or_else(|_| panic!("Could not find {}", SYMBOL_IPC_BUFFER)); system_invocations.push(Invocation::new(InvocationArgs::PageMap { page: ipc_buffer_objs[pd_idx].cap_addr, - vspace: vspace_objs[pd_idx].cap_addr, + vspace: pd_vspace_objs[pd_idx].cap_addr, vaddr, rights: Rights::Read as u64 | Rights::Write as u64, attr: ArmVmAttributes::default() | ArmVmAttributes::ExecuteNever as u64, @@ -1745,7 +2015,7 @@ fn build_system(kernel_config: &Config, for (pd_idx, pd) in system.protection_domains.iter().enumerate() { system_invocations.push(Invocation::new(InvocationArgs::SchedControlConfigureFlags { sched_control: kernel_boot_info.sched_control_cap, - sched_context: sched_context_objs[pd_idx].cap_addr, + sched_context: pd_sched_context_objs[pd_idx].cap_addr, budget: pd.budget, period: pd.period, extra_refills: 0, @@ -1753,19 +2023,43 @@ fn build_system(kernel_config: &Config, flags: 0, })); } + for (vm_idx, vm) in virtual_machines.iter().enumerate() { + system_invocations.push(Invocation::new(InvocationArgs::SchedControlConfigureFlags { + sched_control: kernel_boot_info.sched_control_cap, + sched_context: vm_sched_context_objs[vm_idx].cap_addr, + budget: vm.budget, + period: vm.period, + extra_refills: 0, + badge: 0x100 + vm_idx as u64, + flags: 0, + })); + } for (pd_idx, pd) in system.protection_domains.iter().enumerate() { system_invocations.push(Invocation::new(InvocationArgs::TcbSetSchedParams { - tcb: tcb_objs[pd_idx].cap_addr, + tcb: pd_tcb_objs[pd_idx].cap_addr, authority: INIT_TCB_CAP_ADDRESS, mcp: pd.priority as u64, priority: pd.priority as u64, - sched_context: sched_context_objs[pd_idx].cap_addr, + sched_context: pd_sched_context_objs[pd_idx].cap_addr, + // This gets over-written by the call to TCB_SetSpace + fault_ep: fault_ep_endpoint_object.cap_addr, + })); + } + for (vm_idx, vm) in virtual_machines.iter().enumerate() { + system_invocations.push(Invocation::new(InvocationArgs::TcbSetSchedParams { + tcb: vm_tcb_objs[vm_idx].cap_addr, + authority: INIT_TCB_CAP_ADDRESS, + mcp: vm.priority as u64, + priority: vm.priority as u64, + sched_context: vm_sched_context_objs[vm_idx].cap_addr, + // This gets over-written by the call to TCB_SetSpace fault_ep: fault_ep_endpoint_object.cap_addr, })); } // Set VSpace and CSpace + let num_set_space_invocations = system.protection_domains.len() + virtual_machines.len(); let mut set_space_invocation = Invocation::new(InvocationArgs::TcbSetSpace { tcb: tcb_objs[0].cap_addr, fault_ep: badged_fault_ep, @@ -1774,7 +2068,7 @@ fn build_system(kernel_config: &Config, vspace_root: vspace_objs[0].cap_addr, vspace_root_data: 0, }); - set_space_invocation.repeat(system.protection_domains.len() as u32, InvocationArgs::TcbSetSpace { + set_space_invocation.repeat(num_set_space_invocations as u32, InvocationArgs::TcbSetSpace { tcb: 1, fault_ep: 1, cspace_root: 1, @@ -1824,7 +2118,20 @@ fn build_system(kernel_config: &Config, }); system_invocations.push(bind_ntfn_invocation); - // Resume (start) all the threads + // Bind virtual machine TCBs to vCPUs + if !virtual_machines.is_empty() { + let mut vcpu_bind_invocation = Invocation::new(InvocationArgs::ArmVcpuSetTcb{ + vcpu: vcpu_objs[0].cap_addr, + tcb: vm_tcb_objs[0].cap_addr, + }); + vcpu_bind_invocation.repeat(virtual_machines.len() as u32, InvocationArgs::ArmVcpuSetTcb{ + vcpu: 1, + tcb: 1, + }); + system_invocations.push(vcpu_bind_invocation); + } + + // Resume (start) all the threads that belong to PDs (VMs are not started upon system init) let mut resume_invocation = Invocation::new(InvocationArgs::TcbResume{ tcb: tcb_objs[0].cap_addr, }); @@ -1889,7 +2196,7 @@ fn build_system(kernel_config: &Config, fault_ep_cap_address: fault_ep_endpoint_object.cap_addr, reply_cap_address: reply_obj.cap_addr, cap_lookup: cap_address_names, - tcb_caps, + tcb_caps: tcb_caps[system.protection_domains.len()..].to_vec(), sched_caps: sched_context_caps, ntfn_caps: notification_caps, pd_elf_regions, @@ -2148,6 +2455,8 @@ fn main() -> Result<(), String> { let kernel_elf_path = elf_path.join("sel4.elf"); let monitor_elf_path = elf_path.join("monitor.elf"); + let kernel_config_path = sdk_dir.join("board").join(args.board).join(args.config).join("include/kernel/gen_config.json"); + if !elf_path.exists() { eprintln!("Error: board ELF directory '{}' does not exist", elf_path.display()); std::process::exit(1); @@ -2164,6 +2473,10 @@ fn main() -> Result<(), String> { eprintln!("Error: monitor ELF '{}' does not exist", monitor_elf_path.display()); std::process::exit(1); } + if !kernel_config_path.exists() { + eprintln!("Error: kernel configuration file '{}' does not exist", kernel_config_path.display()); + std::process::exit(1); + } let system_path = Path::new(args.system); if !system_path.exists() { @@ -2173,17 +2486,28 @@ fn main() -> Result<(), String> { let xml: String = fs::read_to_string(args.system).unwrap(); + let kernel_config_json: serde_json::Value = serde_json::from_str(&fs::read_to_string(kernel_config_path).unwrap()).unwrap(); + let kernel_config = Config { arch: Arch::Aarch64, - word_size: 64, + word_size: json_str_as_u64(&kernel_config_json, "WORD_SIZE")?, minimum_page_size: 4096, - paddr_user_device_top: 1 << 40, + paddr_user_device_top: json_str_as_u64(&kernel_config_json, "PADDR_USER_DEVICE_TOP")?, kernel_frame_size: 1 << 12, - init_cnode_bits: 12, + init_cnode_bits: json_str_as_u64(&kernel_config_json, "ROOT_CNODE_SIZE_BITS")?, cap_address_bits: 64, - fan_out_limit: 256, + fan_out_limit: json_str_as_u64(&kernel_config_json, "RETYPE_FAN_OUT_LIMIT")?, + arm_pa_size_bits: 40, + hypervisor: json_str_as_bool(&kernel_config_json, "ARM_HYPERVISOR_SUPPORT")?, }; + match kernel_config.arch { + Arch::Aarch64 => assert!(kernel_config.hypervisor, "Microkit tool expects a kernel with hypervisor mode enabled on AArch64.") + } + + assert!(kernel_config.word_size == 64, "Microkit tool has various assumptions about the word size being 64-bits."); + assert!(kernel_config.arm_pa_size_bits == 40, "Microkit tool has assumptions about the ARM physical address size bits"); + let plat_desc = PlatformDescription::new(&kernel_config); let system = match parse(args.system, &xml, &plat_desc) { Ok(system) => system, @@ -2370,6 +2694,7 @@ fn main() -> Result<(), String> { } let loader = Loader::new( + kernel_config, Path::new(&loader_elf_path), &kernel_elf, &monitor_elf, diff --git a/tool/microkit/src/sel4.rs b/tool/microkit/src/sel4.rs index 149939aa..f3bfd085 100644 --- a/tool/microkit/src/sel4.rs +++ b/tool/microkit/src/sel4.rs @@ -39,13 +39,15 @@ pub struct Object { pub struct Config { pub arch: Arch, - pub word_size: usize, + pub word_size: u64, pub minimum_page_size: u64, pub paddr_user_device_top: u64, pub kernel_frame_size: u64, pub init_cnode_bits: u64, pub cap_address_bits: u64, pub fan_out_limit: u64, + pub hypervisor: bool, + pub arm_pa_size_bits: usize, } pub enum Arch { @@ -68,6 +70,7 @@ pub enum ObjectType { SmallPage = 9, LargePage = 10, PageTable = 11, + Vcpu = 12, } impl ObjectType { @@ -79,8 +82,10 @@ impl ObjectType { ObjectType::Reply => Some(OBJECT_SIZE_REPLY), ObjectType::VSpace => Some(OBJECT_SIZE_VSPACE), ObjectType::PageTable => Some(OBJECT_SIZE_PAGE_TABLE), + ObjectType::HugePage => Some(OBJECT_SIZE_HUGE_PAGE), ObjectType::LargePage => Some(OBJECT_SIZE_LARGE_PAGE), ObjectType::SmallPage => Some(OBJECT_SIZE_SMALL_PAGE), + ObjectType::Vcpu => Some(OBJECT_SIZE_VCPU), _ => None } } @@ -99,6 +104,7 @@ impl ObjectType { ObjectType::SmallPage => "SEL4_SMALL_PAGE_OBJECT", ObjectType::LargePage => "SEL4_LARGE_PAGE_OBJECT", ObjectType::PageTable => "SEL4_PAGE_TABLE_OBJECT", + ObjectType::Vcpu => "SEL4_VCPU_OBJECT", } } @@ -134,9 +140,11 @@ pub const OBJECT_SIZE_ENDPOINT: u64 = 1 << 4; pub const OBJECT_SIZE_NOTIFICATION: u64 = 1 << 6; pub const OBJECT_SIZE_REPLY: u64 = 1 << 5; pub const OBJECT_SIZE_PAGE_TABLE: u64 = 1 << 12; -pub const OBJECT_SIZE_LARGE_PAGE: u64 = 2 * 1024 * 1024; -pub const OBJECT_SIZE_SMALL_PAGE: u64 = 4 * 1024; -pub const OBJECT_SIZE_VSPACE: u64 = 4 * 1024; +pub const OBJECT_SIZE_HUGE_PAGE: u64 = 1 << 30; +pub const OBJECT_SIZE_LARGE_PAGE: u64 = 1 << 21; +pub const OBJECT_SIZE_SMALL_PAGE: u64 = 1 << 12; +pub const OBJECT_SIZE_VSPACE: u64 = 1 << 13; +pub const OBJECT_SIZE_VCPU: u64 = 1 << 12; // pub const OBJECT_SIZE_ASID_POOL: u64 = 1 << 12; /// Virtual memory attributes for ARM @@ -241,8 +249,14 @@ enum InvocationLabel { // ARM Asid ArmAsidControlMakePool = 50, ArmAsidPoolAssign = 51, + // ARM vCPU + ArmVcpuSetTcb = 52, + ArmVcpuInjectIrq = 53, + ArmVcpuReadReg = 54, + ArmVcpuWriteReg = 55, + ArmVcpuAckVppi = 56, // ARM IRQ - ArmIrqIssueIrqHandlerTrigger = 52, + ArmIrqIssueIrqHandlerTrigger = 57, } #[derive(Copy, Clone, Default)] @@ -588,6 +602,10 @@ impl Invocation { arg_strs.push(Invocation::fmt_field("badge", badge)); arg_strs.push(Invocation::fmt_field("flags", flags)); (sched_control, "None") + }, + InvocationArgs::ArmVcpuSetTcb { vcpu, tcb } => { + arg_strs.push(Invocation::fmt_field_cap("tcb", tcb, cap_lookup)); + (vcpu, cap_lookup.get(&vcpu).unwrap().as_str()) } }; _ = writeln!(f, "{:<20} - {:<17} - 0x{:016x} ({})\n{}", self.object_type(), self.method_name(), service, service_str, arg_strs.join("\n")); @@ -612,6 +630,7 @@ impl Invocation { InvocationLabel::ArmPageMap => "Page", InvocationLabel::CnodeMint => "CNode", InvocationLabel::SchedControlConfigureFlags => "SchedControl", + InvocationLabel::ArmVcpuSetTcb => "VCPU", _ => panic!("Internal error: unexpected label when getting object type '{:?}'", self.label) } } @@ -632,6 +651,7 @@ impl Invocation { InvocationLabel::ArmPageMap => "Map", InvocationLabel::CnodeMint => "Mint", InvocationLabel::SchedControlConfigureFlags => "ConfigureFlags", + InvocationLabel::ArmVcpuSetTcb => "VCPUSetTcb", _ => panic!("Internal error: unexpected label when getting method name '{:?}'", self.label) } } @@ -654,6 +674,7 @@ impl InvocationArgs { InvocationArgs::PageMap { .. } => InvocationLabel::ArmPageMap, InvocationArgs::CnodeMint { .. } => InvocationLabel::CnodeMint, InvocationArgs::SchedControlConfigureFlags { .. } => InvocationLabel::SchedControlConfigureFlags, + InvocationArgs::ArmVcpuSetTcb { .. } => InvocationLabel::ArmVcpuSetTcb, } } @@ -718,7 +739,8 @@ impl InvocationArgs { sched_control, vec![budget, period, extra_refills, badge, flags], vec![sched_context] - ) + ), + InvocationArgs::ArmVcpuSetTcb { vcpu, tcb } => (vcpu, vec![], vec![tcb]), } } } @@ -818,5 +840,9 @@ pub enum InvocationArgs { extra_refills: u64, badge: u64, flags: u64, + }, + ArmVcpuSetTcb { + vcpu: u64, + tcb: u64, } } diff --git a/tool/microkit/src/sysxml.rs b/tool/microkit/src/sysxml.rs index 60cdc135..107cadd7 100644 --- a/tool/microkit/src/sysxml.rs +++ b/tool/microkit/src/sysxml.rs @@ -34,8 +34,11 @@ use crate::MAX_PDS; /// This means we are left with 62 bits for the ID. /// IDs start at zero. const PD_MAX_ID: u64 = 61; +const VCPU_MAX_ID: u64 = PD_MAX_ID; const PD_MAX_PRIORITY: u8 = 254; +/// In microseconds +const BUDGET_DEFAULT: u64 = 1000; /// The purpose of this function is to parse an integer that could /// either be in decimal or hex format, unlike the normal parsing @@ -152,6 +155,7 @@ pub struct ProtectionDomain { pub maps: Vec, pub irqs: Vec, pub setvars: Vec, + pub virtual_machine: Option, /// Only used when parsing child PDs. All elements will be removed /// once we flatten each PD and its children into one list. pub child_pds: Vec, @@ -163,15 +167,31 @@ pub struct ProtectionDomain { text_pos: roxmltree::TextPos } +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct VirtualMachine { + // Right now virtual machines are limited to a single vCPU + pub vcpu: VirtualCpu, + pub name: String, + pub maps: Vec, + pub priority: u8, + pub budget: u64, + pub period: u64, +} + +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct VirtualCpu { + pub id: u64, +} + impl SysMapPerms { - fn from_str(s: &str) -> Result { + fn from_str(s: &str) -> Result { let mut perms = 0; for c in s.chars() { match c { 'r' => perms |= SysMapPerms::Read as u8, 'w' => perms |= SysMapPerms::Write as u8, 'x' => perms |= SysMapPerms::Execute as u8, - _ => return Err("invalid permissions") + _ => return Err(()), } } @@ -179,9 +199,54 @@ impl SysMapPerms { } } +impl SysMap { + fn from_xml(xml_sdf: &XmlSystemDescription, node: &roxmltree::Node, allow_setvar: bool) -> Result { + let mut attrs = vec!["mr", "vaddr", "perms", "cached"]; + if allow_setvar { + attrs.push("setvar_vaddr"); + } + check_attributes(xml_sdf, node, &attrs)?; + + let mr = checked_lookup(xml_sdf, node, "mr")?.to_string(); + let vaddr = sdf_parse_number(checked_lookup(xml_sdf, node, "vaddr")?, node)?; + let perms = if let Some(xml_perms) = node.attribute("perms") { + match SysMapPerms::from_str(xml_perms) { + Ok(parsed_perms) => parsed_perms, + Err(()) => return Err(value_error(xml_sdf, node, "perms must only be a combination of 'r', 'w', and 'x'".to_string())), + } + } else { + // Default to read-write + SysMapPerms::Read as u8 | SysMapPerms::Write as u8 + }; + + // On all architectures, the kernel does not allow write-only mappings + if perms == SysMapPerms::Write as u8 { + return Err(value_error(xml_sdf, node, "perms must not be 'w', write-only mappings are not allowed".to_string())); + } + + let cached = if let Some(xml_cached) = node.attribute("cached") { + match str_to_bool(xml_cached) { + Some(val) => val, + None => return Err(value_error(xml_sdf, node, "cached must be 'true' or 'false'".to_string())) + } + } else { + // Default to cached + true + }; + + Ok(SysMap { + mr, + vaddr, + perms, + cached, + text_pos: Some(xml_sdf.doc.text_pos_at(node.range().start)), + }) + } +} + impl ProtectionDomain { pub fn needs_ep(&self) -> bool { - self.pp || self.has_children + self.pp || self.has_children || self.virtual_machine.is_some() } fn from_xml(xml_sdf: &XmlSystemDescription, node: &roxmltree::Node, is_child: bool) -> Result { @@ -199,12 +264,11 @@ impl ProtectionDomain { None }; - // Default to 1000 microseconds as the budget, with the period defaulting - // to being the same as the budget as well. + // If we do not have an explicit budget the period is equal to the default budget. let budget = if let Some(xml_budget) = node.attribute("budget") { sdf_parse_number(xml_budget, node)? } else { - 1000 + BUDGET_DEFAULT }; let period = if let Some(xml_period) = node.attribute("period") { sdf_parse_number(xml_period, node)? @@ -216,13 +280,19 @@ impl ProtectionDomain { } let pp = if let Some(xml_pp) = node.attribute("pp") { - str_to_bool(xml_pp)? + match str_to_bool(xml_pp) { + Some(val) => val, + None => return Err(value_error(xml_sdf, node, "pp must be 'true' or 'false'".to_string())) + } } else { false }; let passive = if let Some(xml_passive) = node.attribute("passive") { - str_to_bool(xml_passive)? + match str_to_bool(xml_passive) { + Some(val) => val, + None => return Err(value_error(xml_sdf, node, "passive must be 'true' or 'false'".to_string())) + } } else { false }; @@ -233,6 +303,7 @@ impl ProtectionDomain { let mut child_pds = Vec::new(); let mut program_image = None; + let mut virtual_machine = None; // Default to minimum priority let priority = if let Some(xml_priority) = node.attribute("priority") { @@ -261,43 +332,17 @@ impl ProtectionDomain { program_image = Some(Path::new(program_image_path).to_path_buf()); }, "map" => { - check_attributes(xml_sdf, &child, &["mr", "vaddr", "perms", "cached", "setvar_vaddr"])?; - let mr = checked_lookup(xml_sdf, &child, "mr")?.to_string(); - let vaddr = sdf_parse_number(checked_lookup(xml_sdf, &child, "vaddr")?, &child)?; - let perms = if let Some(xml_perms) = child.attribute("perms") { - SysMapPerms::from_str(xml_perms)? - } else { - // Default to read-write - SysMapPerms::Read as u8 | SysMapPerms::Write as u8 - }; - - // On all architectures, the kernel does not allow write-only mappings - if perms == SysMapPerms::Write as u8 { - return Err(value_error(xml_sdf, &child, "perms must not be 'w', write-only mappings are not allowed".to_string())); - } - - let cached = if let Some(xml_cached) = child.attribute("cached") { - str_to_bool(xml_cached)? - } else { - // Default to cached - true - }; - - maps.push(SysMap { - mr, - vaddr, - perms, - cached, - text_pos: Some(xml_sdf.doc.text_pos_at(child.range().start)), - }); + let map = SysMap::from_xml(xml_sdf, &child, true)?; if let Some(setvar_vaddr) = child.attribute("setvar_vaddr") { setvars.push(SysSetVar { symbol: setvar_vaddr.to_string(), region_paddr: None, - vaddr: Some(vaddr), + vaddr: Some(map.vaddr), }); } + + maps.push(map); } "irq" => { check_attributes(xml_sdf, &child, &["irq", "id", "trigger"])?; @@ -339,6 +384,13 @@ impl ProtectionDomain { }) }, "protection_domain" => child_pds.push(ProtectionDomain::from_xml(xml_sdf, &child, true)?), + "virtual_machine" => { + if virtual_machine.is_some() { + return Err(value_error(xml_sdf, node, "virtual_machine must only be specified once".to_string())); + } + + virtual_machine = Some(VirtualMachine::from_xml(xml_sdf, &child)?); + } _ => { let pos = xml_sdf.doc.text_pos_at(child.range().start); return Err(format!("Invalid XML element '{}': {}", child.tag_name().name(), loc_string(xml_sdf, pos))); @@ -367,6 +419,7 @@ impl ProtectionDomain { irqs, setvars, child_pds, + virtual_machine, has_children, parent: None, text_pos: xml_sdf.doc.text_pos_at(node.range().start), @@ -374,6 +427,84 @@ impl ProtectionDomain { } } +impl VirtualMachine { + fn from_xml(xml_sdf: &XmlSystemDescription, node: &roxmltree::Node) -> Result { + check_attributes(xml_sdf, node, &["name", "budget", "period", "priority"])?; + + let name = checked_lookup(xml_sdf, node, "name")?.to_string(); + // If we do not have an explicit budget the period is equal to the default budget. + let budget = if let Some(xml_budget) = node.attribute("budget") { + sdf_parse_number(xml_budget, node)? + } else { + BUDGET_DEFAULT + }; + let period = if let Some(xml_period) = node.attribute("period") { + sdf_parse_number(xml_period, node)? + } else { + budget + }; + if budget > period { + return Err(value_error(xml_sdf, node, format!("budget ({}) must be less than, or equal to, period ({})", budget, period))); + } + + // Default to minimum priority + let priority = if let Some(xml_priority) = node.attribute("priority") { + sdf_parse_number(xml_priority, node)? + } else { + 0 + }; + + let mut vcpu = None; + let mut maps = Vec::new(); + for child in node.children() { + if !child.is_element() { + continue; + } + + let child_name = child.tag_name().name(); + match child_name { + "vcpu" => { + if vcpu.is_some() { + return Err(value_error(xml_sdf, node, "vcpu must only be specified once".to_string())); + } + + check_attributes(xml_sdf, &child, &["id"])?; + let id = checked_lookup(xml_sdf, &child, "id")?.parse::().unwrap(); + if id > VCPU_MAX_ID { + return Err(value_error(xml_sdf, &child, format!("id must be < {}", VCPU_MAX_ID + 1))); + } + vcpu = Some(VirtualCpu { id }); + } + "map" => { + // Virtual machines do not have program images and so we do not allow + // setvar_vaddr on SysMap + let map = SysMap::from_xml(xml_sdf, &child, false)?; + maps.push(map); + }, + _ => { + let pos = xml_sdf.doc.text_pos_at(node.range().start); + return Err(format!("Error: invalid XML element '{}': {}", child_name, loc_string(xml_sdf, pos))); + } + } + } + + if vcpu.is_none() { + return Err(format!("Error: missing 'vcpu' element on virtual_machine: '{}'", name)); + } + + Ok(VirtualMachine { + vcpu: vcpu.unwrap(), + name, + maps, + // This downcast is safe as we have checked that this is less than + // the maximum VM priority, which fits in a u8. + priority: priority as u8, + budget, + period, + }) + } +} + impl SysMemoryRegion { fn from_xml(xml_sdf: &XmlSystemDescription, node: &roxmltree::Node, plat_desc: &PlatformDescription) -> Result { check_attributes(xml_sdf, node, &["name", "size", "page_size", "phys_addr"])?; @@ -543,7 +674,14 @@ fn pd_tree_to_list(xml_sdf: &XmlSystemDescription, mut root_pd: ProtectionDomain if child_ids.contains(&child_id) { return Err(format!("Error: duplicate id: {} in protection domain: '{}' @ {}", child_id, root_pd.name, loc_string(xml_sdf, child_pd.text_pos))); } - child_ids.push(child_pd.id.unwrap()); + // Also check that the child ID does not clash with the virtual machine ID, if the PD has one + if let Some(vm) = &root_pd.virtual_machine { + if child_id == vm.vcpu.id { + return Err(format!("Error: duplicate id: {} clashes with virtual machine vcpu id in protection domain: '{}' @ {}", + child_id, root_pd.name, loc_string(xml_sdf, child_pd.text_pos))); + } + } + child_ids.push(child_id); } if parent { @@ -649,6 +787,19 @@ pub fn parse(filename: &str, xml: &str, plat_desc: &PlatformDescription) -> Resu } } + let mut vms = vec![]; + for pd in &pds { + match &pd.virtual_machine { + Some(vm) => { + if vms.contains(&vm) { + return Err(format!("Error: duplicate virtual machine name '{}'.", vm.name)); + } + vms.push(vm); + }, + None => {} + } + } + // Ensure no duplicate IRQs let mut all_irqs = Vec::new(); for pd in &pds { diff --git a/tool/microkit/src/util.rs b/tool/microkit/src/util.rs index 2eac7e10..5e45667e 100644 --- a/tool/microkit/src/util.rs +++ b/tool/microkit/src/util.rs @@ -5,6 +5,7 @@ // use crate::sel4::Object; +use serde_json; pub fn msb(x: u64) -> u64 { 64 - x.leading_zeros() as u64 - 1 @@ -14,11 +15,11 @@ pub fn lsb(x: u64) -> u64 { x.trailing_zeros() as u64 } -pub fn str_to_bool(s: &str) -> Result { +pub fn str_to_bool(s: &str) -> Option { match s { - "true" => Ok(true), - "false" => Ok(false), - _ => Err("invalid boolean value") + "true" => Some(true), + "false" => Some(false), + _ => None } } @@ -124,6 +125,30 @@ pub fn comma_sep_usize(n: usize) -> String { comma_sep_u64(n as u64) } +pub fn json_str_as_u64(json: &serde_json::Value, field: &'static str) -> Result { + match json.get(field) { + Some(value) => Ok( + value + .as_str() + .unwrap_or_else(|| panic!("JSON field '{}' is not a string", field)) + .parse::() + .unwrap_or_else(|_| panic!("JSON field '{}' could not be converted to u64", field)) + ), + None => Err(format!("JSON field '{}' does not exist", field)) + } +} + +pub fn json_str_as_bool(json: &serde_json::Value, field: &'static str) -> Result { + match json.get(field) { + Some(value) => Ok( + value + .as_bool() + .unwrap_or_else(|| panic!("JSON field '{}' could not be converted to bool", field)) + ), + None => Err(format!("JSON field '{}' does not exist", field)) + } +} + /// Convert a struct into raw bytes in order to be written to /// disk or some other format. #[allow(clippy::missing_safety_doc)] diff --git a/tool/microkit/tests/test.rs b/tool/microkit/tests/test.rs index 823d1e7c..20c03a0f 100644 --- a/tool/microkit/tests/test.rs +++ b/tool/microkit/tests/test.rs @@ -15,6 +15,8 @@ const DEFAULT_KERNEL_CONFIG: sel4::Config = sel4::Config { init_cnode_bits: 12, cap_address_bits: 64, fan_out_limit: 256, + hypervisor: true, + arm_pa_size_bits: 40, }; const DEFAULT_PLAT_DESC: sysxml::PlatformDescription = sysxml::PlatformDescription::new(&DEFAULT_KERNEL_CONFIG);