From 25056b293c911a8033bd38e2c0c694d37f11ee36 Mon Sep 17 00:00:00 2001 From: Dmytro Semenets Date: Wed, 31 May 2023 14:39:34 +0300 Subject: [PATCH 01/14] drivers: xen: add xen version hypercall Xen API contains hypercall, which allows domains to identify Xen version, that is currently used on the system. It can be used to check if current version is supported by Zephyr or to change behavior of the drivers or services. Signed-off-by: Dmytro Semenets Signed-off-by: Dmytro Firsov --- arch/arm64/core/xen/hypercall.S | 3 ++- drivers/xen/CMakeLists.txt | 3 ++- drivers/xen/version.c | 26 ++++++++++++++++++++++++ include/zephyr/arch/arm64/hypercall.h | 3 ++- include/zephyr/xen/public/version.h | 29 +++++++++++++++++++++++++++ include/zephyr/xen/version.h | 28 ++++++++++++++++++++++++++ 6 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 drivers/xen/version.c create mode 100644 include/zephyr/xen/public/version.h create mode 100644 include/zephyr/xen/version.h diff --git a/arch/arm64/core/xen/hypercall.S b/arch/arm64/core/xen/hypercall.S index 063538bae9029..eeee7e3f32e2e 100644 --- a/arch/arm64/core/xen/hypercall.S +++ b/arch/arm64/core/xen/hypercall.S @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: Apache-2.0 */ /* - * Copyright (c) 2021-2023 EPAM Systems + * Copyright (c) 2021-2025 EPAM Systems */ #include @@ -23,6 +23,7 @@ HYPERCALL(sched_op); HYPERCALL(event_channel_op); HYPERCALL(hvm_op); HYPERCALL(memory_op); +HYPERCALL(xen_version); #ifdef CONFIG_XEN_DOM0 HYPERCALL(domctl); diff --git a/drivers/xen/CMakeLists.txt b/drivers/xen/CMakeLists.txt index 412be7d318d3d..2f0a060ece3ab 100644 --- a/drivers/xen/CMakeLists.txt +++ b/drivers/xen/CMakeLists.txt @@ -1,9 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021-2023 EPAM Systems +# Copyright (c) 2021-2025 EPAM Systems zephyr_sources(hvm.c) zephyr_sources(events.c) zephyr_sources_ifdef(CONFIG_XEN_GRANT_TABLE gnttab.c) zephyr_sources(memory.c) +zephyr_sources(version.c) add_subdirectory_ifdef(CONFIG_XEN_DOM0 dom0) diff --git a/drivers/xen/version.c b/drivers/xen/version.c new file mode 100644 index 0000000000000..41fc7c1e97a2c --- /dev/null +++ b/drivers/xen/version.c @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2025 EPAM Systems + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include + +int xen_version(void) +{ + return HYPERVISOR_xen_version(XENVER_version, NULL); +} + +int xen_version_extraversion(char *extra, int len) +{ + if (!extra || len < XEN_EXTRAVERSION_LEN) { + return -EINVAL; + } + + memset(extra, 0, len); + return HYPERVISOR_xen_version(XENVER_extraversion, extra); +} diff --git a/include/zephyr/arch/arm64/hypercall.h b/include/zephyr/arch/arm64/hypercall.h index 023ea10803d06..dd02320d4b88e 100644 --- a/include/zephyr/arch/arm64/hypercall.h +++ b/include/zephyr/arch/arm64/hypercall.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: Apache-2.0 */ /* - * Copyright (c) 2021-2023 EPAM Systems + * Copyright (c) 2021-2025 EPAM Systems */ #ifndef ZEPHYR_INCLUDE_ARCH_ARM64_HYPERCALL_H_ @@ -13,6 +13,7 @@ int HYPERVISOR_event_channel_op(int op, void *param); int HYPERVISOR_hvm_op(int op, void *param); int HYPERVISOR_memory_op(int op, void *param); int HYPERVISOR_grant_table_op(int op, void *uop, unsigned int count); +int HYPERVISOR_xen_version(int op, void *param); #ifdef CONFIG_XEN_DOM0 int HYPERVISOR_domctl(void *param); diff --git a/include/zephyr/xen/public/version.h b/include/zephyr/xen/public/version.h new file mode 100644 index 0000000000000..08f2d3b69b0de --- /dev/null +++ b/include/zephyr/xen/public/version.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/****************************************************************************** + * version.h + * + * Xen version, type, and compile information. + * + * Copyright (c) 2005, Nguyen Anh Quynh + * Copyright (c) 2005, Keir Fraser + * + * Copyright (c) 2025 EPAM Systems + */ + +#ifndef __XEN_PUBLIC_VERSION_H__ +#define __XEN_PUBLIC_VERSION_H__ + +#include "xen.h" + +/* NB. All ops return zero on success, except XENVER_{version,pagesize} + * XENVER_{version,pagesize,build_id} + */ + +/* arg == NULL; returns major:minor (16:16). */ +#define XENVER_version 0 + +/* arg == xen_extraversion_t. */ +#define XENVER_extraversion 1 +#define XEN_EXTRAVERSION_LEN 16 + +#endif /* __XEN_PUBLIC_VERSION_H__ */ diff --git a/include/zephyr/xen/version.h b/include/zephyr/xen/version.h new file mode 100644 index 0000000000000..c639910862451 --- /dev/null +++ b/include/zephyr/xen/version.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2025 EPAM Systems + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef __XEN_DOM0_VERSION_H__ +#define __XEN_DOM0_VERSION_H__ +#include +#include +#include + +/** + * Get Xen hypervisor version integer encoded information + * + * @return Xen version on success, negative errno on error + */ +int xen_version(void); + +/** + * Get Xen hypervisor extra version string + * + * @param extra - buffer to store the extra version string + * @param len - maximum length of the buffer + * @return 0 on success, negative errno on error + */ +int xen_version_extraversion(char *extra, int len); + +#endif /* __XEN_DOM0_VERSION_H__ */ From fff9abd54c9e607af3d53e71341fe51e2e2fd8bf Mon Sep 17 00:00:00 2001 From: Dmytro Semenets Date: Wed, 31 May 2023 14:43:54 +0300 Subject: [PATCH 02/14] drivers: xen: dom0: add Xen sysctl hypercall This hypercall can be used get some information about physical machine and running guests: - sysctl hypercall "xen_sysctl_getphysinfo" allows read information about physical machine: number CPUs, memory sizes, hardware capabilities, etc. - sysctl hypercall "xen_sysctl_getdomaininfolist" returns array of domain info structures that provide information about particular domain(s). Signed-off-by: Dmytro Semenets Signed-off-by: Mykyta Poturai Signed-off-by: Dmytro Firsov --- arch/arm64/core/xen/hypercall.S | 1 + drivers/xen/dom0/CMakeLists.txt | 3 +- drivers/xen/dom0/sysctl.c | 59 ++++++++++ include/zephyr/arch/arm64/hypercall.h | 1 + include/zephyr/xen/dom0/sysctl.h | 40 +++++++ include/zephyr/xen/public/sysctl.h | 148 ++++++++++++++++++++++++++ 6 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 drivers/xen/dom0/sysctl.c create mode 100644 include/zephyr/xen/dom0/sysctl.h create mode 100644 include/zephyr/xen/public/sysctl.h diff --git a/arch/arm64/core/xen/hypercall.S b/arch/arm64/core/xen/hypercall.S index eeee7e3f32e2e..54e825765debc 100644 --- a/arch/arm64/core/xen/hypercall.S +++ b/arch/arm64/core/xen/hypercall.S @@ -27,4 +27,5 @@ HYPERCALL(xen_version); #ifdef CONFIG_XEN_DOM0 HYPERCALL(domctl); +HYPERCALL(sysctl); #endif diff --git a/drivers/xen/dom0/CMakeLists.txt b/drivers/xen/dom0/CMakeLists.txt index c722beb611939..e95401c3ecea5 100644 --- a/drivers/xen/dom0/CMakeLists.txt +++ b/drivers/xen/dom0/CMakeLists.txt @@ -1,4 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2023 EPAM Systems +# Copyright (c) 2023-2025 EPAM Systems zephyr_sources(domctl.c) +zephyr_sources(sysctl.c) diff --git a/drivers/xen/dom0/sysctl.c b/drivers/xen/dom0/sysctl.c new file mode 100644 index 0000000000000..5bccf9e5c321f --- /dev/null +++ b/drivers/xen/dom0/sysctl.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2025 EPAM Systems + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include + +static int do_sysctl(xen_sysctl_t *sysctl) +{ + sysctl->interface_version = XEN_SYSCTL_INTERFACE_VERSION; + return HYPERVISOR_sysctl(sysctl); +} + +int xen_sysctl_physinfo(struct xen_sysctl_physinfo *info) +{ + int ret; + xen_sysctl_t sysctl = { + .cmd = XEN_SYSCTL_physinfo, + }; + + if (!info) { + return -EINVAL; + } + + ret = do_sysctl(&sysctl); + if (ret < 0) { + return ret; + } + *info = sysctl.u.physinfo; + + return ret; +} + +int xen_sysctl_getdomaininfo(struct xen_domctl_getdomaininfo *domaininfo, + uint16_t first, uint16_t num) +{ + int ret; + xen_sysctl_t sysctl = { + .cmd = XEN_SYSCTL_getdomaininfolist, + .u.getdomaininfolist.first_domain = first, + .u.getdomaininfolist.max_domains = num, + }; + + if (!domaininfo || !num) { + return -EINVAL; + } + set_xen_guest_handle(sysctl.u.getdomaininfolist.buffer, domaininfo); + + ret = do_sysctl(&sysctl); + if (ret < 0) { + return ret; + } + + return sysctl.u.getdomaininfolist.num_domains; +} diff --git a/include/zephyr/arch/arm64/hypercall.h b/include/zephyr/arch/arm64/hypercall.h index dd02320d4b88e..15ef3b33b2c88 100644 --- a/include/zephyr/arch/arm64/hypercall.h +++ b/include/zephyr/arch/arm64/hypercall.h @@ -17,6 +17,7 @@ int HYPERVISOR_xen_version(int op, void *param); #ifdef CONFIG_XEN_DOM0 int HYPERVISOR_domctl(void *param); +int HYPERVISOR_sysctl(void *param); #endif #endif /* ZEPHYR_INCLUDE_ARCH_ARM64_HYPERCALL_H_ */ diff --git a/include/zephyr/xen/dom0/sysctl.h b/include/zephyr/xen/dom0/sysctl.h new file mode 100644 index 0000000000000..0b070121905b5 --- /dev/null +++ b/include/zephyr/xen/dom0/sysctl.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2025 EPAM Systems + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file + * + * @brief Xen System Control Interface + */ + +#ifndef __XEN_DOM0_SYSCTL_H__ +#define __XEN_DOM0_SYSCTL_H__ +#include +#include +#include + +/** + * @brief Retrieves information about the host system. + * + * @param[out] info A pointer to a `struct xen_sysctl_physinfo` structure where the + * retrieved information will be stored. + * @return 0 on success, or a negative error code on failure. + */ +int xen_sysctl_physinfo(struct xen_sysctl_physinfo *info); + +/** + * @brief Retrieves information about Xen domains. + * + * @param[out] domaininfo A pointer to the `xen_domctl_getdomaininfo` structure + * to store the retrieved domain information. + * @param first The first domain ID to retrieve information for. + * @param num The maximum number of domains to retrieve information for. + * @return 0 on success, or a negative error code on failure. + */ +int xen_sysctl_getdomaininfo(struct xen_domctl_getdomaininfo *domaininfo, + uint16_t first, uint16_t num); + +#endif /* __XEN_DOM0_SYSCTL_H__ */ diff --git a/include/zephyr/xen/public/sysctl.h b/include/zephyr/xen/public/sysctl.h new file mode 100644 index 0000000000000..eb07500b38060 --- /dev/null +++ b/include/zephyr/xen/public/sysctl.h @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: MIT */ +/****************************************************************************** + * sysctl.h + * + * System management operations. For use by node control stack. + * + * Copyright (c) 2002-2006, K Fraser + * Copyright (c) 2025 EPAM Systems + * + */ + +#ifndef __XEN_PUBLIC_SYSCTL_H__ +#define __XEN_PUBLIC_SYSCTL_H__ + +#if !defined(CONFIG_XEN_DOM0) +#error "sysctl operations are intended for use by node control tools only" +#endif + +#include +#include "xen.h" +#include "domctl.h" + +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000015 + +/* + * Get physical information about the host machine + */ +/* XEN_SYSCTL_physinfo */ + /* The platform supports HVM guests. */ +#define _XEN_SYSCTL_PHYSCAP_hvm 0 +#define XEN_SYSCTL_PHYSCAP_hvm BIT(_XEN_SYSCTL_PHYSCAP_hvm) + /* The platform supports PV guests. */ +#define _XEN_SYSCTL_PHYSCAP_pv 1 +#define XEN_SYSCTL_PHYSCAP_pv BIT(_XEN_SYSCTL_PHYSCAP_pv) + /* The platform supports direct access to I/O devices with IOMMU. */ +#define _XEN_SYSCTL_PHYSCAP_directio 2 +#define XEN_SYSCTL_PHYSCAP_directio BIT(_XEN_SYSCTL_PHYSCAP_directio) +/* The platform supports Hardware Assisted Paging. */ +#define _XEN_SYSCTL_PHYSCAP_hap 3 +#define XEN_SYSCTL_PHYSCAP_hap BIT(_XEN_SYSCTL_PHYSCAP_hap) +/* The platform supports software paging. */ +#define _XEN_SYSCTL_PHYSCAP_shadow 4 +#define XEN_SYSCTL_PHYSCAP_shadow BIT(_XEN_SYSCTL_PHYSCAP_shadow) +/* The platform supports sharing of HAP page tables with the IOMMU. */ +#define _XEN_SYSCTL_PHYSCAP_iommu_hap_pt_share 5 +#define XEN_SYSCTL_PHYSCAP_iommu_hap_pt_share BIT(_XEN_SYSCTL_PHYSCAP_iommu_hap_pt_share) +#define XEN_SYSCTL_PHYSCAP_vmtrace BIT(6) +/* The platform supports vPMU. */ +#define XEN_SYSCTL_PHYSCAP_vpmu BIT(7) + +/* Xen supports the Grant v1 and/or v2 ABIs. */ +#define XEN_SYSCTL_PHYSCAP_gnttab_v1 BIT(8) +#define XEN_SYSCTL_PHYSCAP_gnttab_v2 BIT(9) + +/* Max XEN_SYSCTL_PHYSCAP_* constant. Used for ABI checking. */ +#define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_gnttab_v2 + +struct xen_sysctl_physinfo { + uint32_t threads_per_core; + uint32_t cores_per_socket; + uint32_t nr_cpus; /* # CPUs currently online */ + uint32_t max_cpu_id; /* Largest possible CPU ID on this host */ + uint32_t nr_nodes; /* # nodes currently online */ + uint32_t max_node_id; /* Largest possible node ID on this host */ + uint32_t cpu_khz; + uint32_t capabilities;/* XEN_SYSCTL_PHYSCAP_??? */ + uint32_t arch_capabilities;/* XEN_SYSCTL_PHYSCAP_{X86,ARM,...}_??? */ + uint32_t pad; + uint64_aligned_t total_pages; + uint64_aligned_t free_pages; + uint64_aligned_t scrub_pages; + uint64_aligned_t outstanding_pages; + uint64_aligned_t max_mfn; /* Largest possible MFN on this host */ + uint32_t hw_cap[8]; +}; + +/* XEN_SYSCTL_getdomaininfolist */ +struct xen_sysctl_getdomaininfolist { + /* IN variables. */ + domid_t first_domain; + uint32_t max_domains; + + XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer; + /* OUT variables. */ + uint32_t num_domains; +}; + +/* Get physical CPU information. */ +/* XEN_SYSCTL_getcpuinfo */ +struct xen_sysctl_cpuinfo { + uint64_aligned_t idletime; +}; + +typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); + +struct xen_sysctl_getcpuinfo { + /* IN variables. */ + uint32_t max_cpus; + + XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info; + /* OUT variables. */ + uint32_t nr_cpus; +}; + +struct xen_sysctl { + uint32_t cmd; +#define XEN_SYSCTL_readconsole 1 +#define XEN_SYSCTL_tbuf_op 2 +#define XEN_SYSCTL_physinfo 3 +#define XEN_SYSCTL_sched_id 4 +#define XEN_SYSCTL_perfc_op 5 +#define XEN_SYSCTL_getdomaininfolist 6 +#define XEN_SYSCTL_debug_keys 7 +#define XEN_SYSCTL_getcpuinfo 8 +#define XEN_SYSCTL_availheap 9 +#define XEN_SYSCTL_get_pmstat 10 +#define XEN_SYSCTL_cpu_hotplug 11 +#define XEN_SYSCTL_pm_op 12 +#define XEN_SYSCTL_page_offline_op 14 +#define XEN_SYSCTL_lockprof_op 15 +#define XEN_SYSCTL_cputopoinfo 16 +#define XEN_SYSCTL_numainfo 17 +#define XEN_SYSCTL_cpupool_op 18 +#define XEN_SYSCTL_scheduler_op 19 +#define XEN_SYSCTL_coverage_op 20 +#define XEN_SYSCTL_psr_cmt_op 21 +#define XEN_SYSCTL_pcitopoinfo 22 +#define XEN_SYSCTL_psr_alloc 23 +/* #define XEN_SYSCTL_tmem_op 24 */ +#define XEN_SYSCTL_get_cpu_levelling_caps 25 +#define XEN_SYSCTL_get_cpu_featureset 26 +#define XEN_SYSCTL_livepatch_op 27 +/* #define XEN_SYSCTL_set_parameter 28 */ +#define XEN_SYSCTL_get_cpu_policy 29 + uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ + union { + struct xen_sysctl_physinfo physinfo; + struct xen_sysctl_getdomaininfolist getdomaininfolist; + struct xen_sysctl_getcpuinfo getcpuinfo; + uint8_t pad[128]; + } u; +}; + +typedef struct xen_sysctl xen_sysctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t); + +#endif /* __XEN_PUBLIC_SYSCTL_H__ */ From a151a04c043acb40158c456fc4572ea28cf820c8 Mon Sep 17 00:00:00 2001 From: Dmytro Semenets Date: Wed, 31 May 2023 14:52:01 +0300 Subject: [PATCH 03/14] drivers: xen: dom0: add getvcpuinfo domctl call Add the XEN_DOMCTL_getvcpuinfo domain control call to allow Domain-0 to query information about a domain's virtual CPUs. This can be used by management tools and services for gathering statistics and monitoring the current status of vCPUs. Signed-off-by: Dmytro Semenets Signed-off-by: Dmytro Firsov --- drivers/xen/dom0/domctl.c | 21 +++++++++++++++++++++ include/zephyr/xen/dom0/domctl.h | 1 + include/zephyr/xen/public/domctl.h | 13 +++++++++++++ 3 files changed, 35 insertions(+) diff --git a/drivers/xen/dom0/domctl.c b/drivers/xen/dom0/domctl.c index 5d5178584ad80..99529b5f5fe96 100644 --- a/drivers/xen/dom0/domctl.c +++ b/drivers/xen/dom0/domctl.c @@ -304,3 +304,24 @@ int xen_domctl_cacheflush(int domid, struct xen_domctl_cacheflush *cacheflush) return do_domctl(&domctl); } + +int xen_domctl_getvcpu(int domid, uint32_t vcpu, struct xen_domctl_getvcpuinfo *info) +{ + int ret; + xen_domctl_t domctl = { + .cmd = XEN_DOMCTL_getvcpuinfo, + .domain = domid, + .u.getvcpuinfo.vcpu = vcpu, + }; + + if (!info) { + return -EINVAL; + } + + ret = do_domctl(&domctl); + if (!ret) { + *info = domctl.u.getvcpuinfo; + } + + return ret; +} diff --git a/include/zephyr/xen/dom0/domctl.h b/include/zephyr/xen/dom0/domctl.h index 1ff8334968851..59d39d8b9a96b 100644 --- a/include/zephyr/xen/dom0/domctl.h +++ b/include/zephyr/xen/dom0/domctl.h @@ -34,5 +34,6 @@ int xen_domctl_max_vcpus(int domid, int max_vcpus); int xen_domctl_createdomain(int domid, struct xen_domctl_createdomain *config); int xen_domctl_cacheflush(int domid, struct xen_domctl_cacheflush *cacheflush); int xen_domctl_destroydomain(int domid); +int xen_domctl_getvcpu(int domid, uint32_t vcpu, struct xen_domctl_getvcpuinfo *info); #endif /* __XEN_DOM0_DOMCTL_H__ */ diff --git a/include/zephyr/xen/public/domctl.h b/include/zephyr/xen/public/domctl.h index 9d856501a0983..baa6b137f5140 100644 --- a/include/zephyr/xen/public/domctl.h +++ b/include/zephyr/xen/public/domctl.h @@ -195,6 +195,18 @@ struct xen_domctl_vcpucontext { XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ }; +/* XEN_DOMCTL_getvcpuinfo */ +struct xen_domctl_getvcpuinfo { + /* IN variables. */ + uint32_t vcpu; + /* OUT variables. */ + uint8_t online; /* currently online (not hotplugged)? */ + uint8_t blocked; /* blocked waiting for an event? */ + uint8_t running; /* currently scheduled on its CPU? */ + uint64_aligned_t cpu_time; /* total cpu time consumed (ns) */ + uint32_t cpu; /* current mapping */ +}; + /* * XEN_DOMCTL_max_vcpus: * @@ -499,6 +511,7 @@ struct xen_domctl { struct xen_domctl_getdomaininfo getdomaininfo; struct xen_domctl_max_mem max_mem; struct xen_domctl_vcpucontext vcpucontext; + struct xen_domctl_getvcpuinfo getvcpuinfo; struct xen_domctl_max_vcpus max_vcpus; struct xen_domctl_scheduler_op scheduler_op; struct xen_domctl_iomem_permission iomem_permission; From a5795203eabb12cb63d6cfd42d6aa0366a218d09 Mon Sep 17 00:00:00 2001 From: Mykyta Poturai Date: Mon, 29 Apr 2024 11:00:08 +0300 Subject: [PATCH 04/14] xen: domctl: fix function parameter name in set/get_paging_mempool_size The size is passed in bytes, not in megabytes. So rename the parameter to avoid confusion. Signed-off-by: Mykyta Poturai Signed-off-by: Dmytro Firsov --- drivers/xen/dom0/domctl.c | 8 ++++---- include/zephyr/xen/dom0/domctl.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/xen/dom0/domctl.c b/drivers/xen/dom0/domctl.c index 99529b5f5fe96..0c6d184966298 100644 --- a/drivers/xen/dom0/domctl.c +++ b/drivers/xen/dom0/domctl.c @@ -105,7 +105,7 @@ int xen_domctl_getdomaininfo(int domid, xen_domctl_getdomaininfo_t *dom_info) return 0; } -int xen_domctl_get_paging_mempool_size(int domid, uint64_t *size_mb) +int xen_domctl_get_paging_mempool_size(int domid, uint64_t *size) { int rc; xen_domctl_t domctl = { @@ -118,17 +118,17 @@ int xen_domctl_get_paging_mempool_size(int domid, uint64_t *size_mb) return rc; } - *size_mb = domctl.u.paging_mempool.size; + *size = domctl.u.paging_mempool.size; return 0; } -int xen_domctl_set_paging_mempool_size(int domid, uint64_t size_mb) +int xen_domctl_set_paging_mempool_size(int domid, uint64_t size) { xen_domctl_t domctl = { .cmd = XEN_DOMCTL_set_paging_mempool_size, .domain = domid, - .u.paging_mempool.size = size_mb, + .u.paging_mempool.size = size, }; return do_domctl(&domctl); diff --git a/include/zephyr/xen/dom0/domctl.h b/include/zephyr/xen/dom0/domctl.h index 59d39d8b9a96b..6b2da69803151 100644 --- a/include/zephyr/xen/dom0/domctl.h +++ b/include/zephyr/xen/dom0/domctl.h @@ -19,8 +19,8 @@ int xen_domctl_resumedomain(int domid); int xen_domctl_getvcpucontext(int domid, int vcpu, vcpu_guest_context_t *ctxt); int xen_domctl_setvcpucontext(int domid, int vcpu, vcpu_guest_context_t *ctxt); int xen_domctl_getdomaininfo(int domid, xen_domctl_getdomaininfo_t *dom_info); -int xen_domctl_get_paging_mempool_size(int domid, uint64_t *size_mb); -int xen_domctl_set_paging_mempool_size(int domid, uint64_t size_mb); +int xen_domctl_get_paging_mempool_size(int domid, uint64_t *size); +int xen_domctl_set_paging_mempool_size(int domid, uint64_t size); int xen_domctl_max_mem(int domid, uint64_t max_memkb); int xen_domctl_set_address_size(int domid, int addr_size); int xen_domctl_iomem_permission(int domid, uint64_t first_mfn, From 4a66a7d5a99f408cc8227eacbfcc1f14f1cac879 Mon Sep 17 00:00:00 2001 From: Mykyta Poturai Date: Mon, 29 Apr 2024 11:56:54 +0300 Subject: [PATCH 05/14] xen: domctl: add doxygen comments for domctl functions Document all of the public functions in the domctl API with doxygen Signed-off-by: Mykyta Poturai Signed-off-by: Dmytro Firsov --- include/zephyr/xen/dom0/domctl.h | 196 +++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) diff --git a/include/zephyr/xen/dom0/domctl.h b/include/zephyr/xen/dom0/domctl.h index 6b2da69803151..3b947390676bb 100644 --- a/include/zephyr/xen/dom0/domctl.h +++ b/include/zephyr/xen/dom0/domctl.h @@ -3,6 +3,13 @@ * Copyright (c) 2023 EPAM Systems * */ + +/** + * @file + * + * @brief Xen Domain Control Interface + */ + #ifndef __XEN_DOM0_DOMCTL_H__ #define __XEN_DOM0_DOMCTL_H__ @@ -12,28 +19,217 @@ #include +/** + * @brief Perform a scheduler operation on a specified domain. + * + * @param domid The ID of the domain on which the scheduler operation is to be performed. + * @param[in,out] sched_op A pointer to a `struct xen_domctl_scheduler_op` object that defines + * the specific scheduler operation to be performed. + * @return Returns 0 on success, or a negative error code on failure. + */ int xen_domctl_scheduler_op(int domid, struct xen_domctl_scheduler_op *sched_op); + +/** + * @brief Pauses a domain in the Xen hypervisor. + * + * @param domid The ID of the domain to be paused. + * @return Returns 0 on success, or a negative error code on failure. + */ int xen_domctl_pausedomain(int domid); + +/** + * @brief Unpauses a domain in the Xen hypervisor. + * + * @param domid The domain ID of the domain to be unpaused. + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_unpausedomain(int domid); + +/** + * @brief Resumes a domain. + * + * This function resumes the execution of a domain in the shutdown state. + * + * @param domid The ID of the domain to be resumed. + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_resumedomain(int domid); + +/** + * @brief Retrieves the virtual CPU context for a specific domain and virtual CPU. + * This function resumes the execution of a domain in the shutdown state. + * + * @param domid The ID of the domain. + * @param vcpu The ID of the virtual CPU. + * @param[out] ctxt Pointer to the `vcpu_guest_context_t` structure where the + * virtual CPU context will be stored. + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_getvcpucontext(int domid, int vcpu, vcpu_guest_context_t *ctxt); + +/** + * @brief Sets the virtual CPU context for a specified domain and virtual CPU. + * + * @param domid The ID of the domain. + * @param vcpu The ID of the virtual CPU. + * @param ctxt Pointer to the virtual CPU guest context structure. + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_setvcpucontext(int domid, int vcpu, vcpu_guest_context_t *ctxt); + +/** + * @brief Retrieves information about a Xen domain. + * + * @param domid The ID of the Xen domain to retrieve information for. + * @param[out] dom_info Pointer to the structure where the retrieved + * domain information will be stored. + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_getdomaininfo(int domid, xen_domctl_getdomaininfo_t *dom_info); + +/** + * @brief Gets the paging mempool size for a specified domain. + * + * @param domid The ID of the domain. + * @param size pointer where to store size of the paging mempool. + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_get_paging_mempool_size(int domid, uint64_t *size); + +/** + * @brief Sets the paging mempool size for a specified domain. + * + * @param domid The ID of the domain. + * @param size The size of the paging mempool in bytes. + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_set_paging_mempool_size(int domid, uint64_t size); + +/** + * @brief Sets the maximum memory for a specified domain. + * + * @param domid The domain ID of the domain to set the maximum memory for. + * @param max_memkb The maximum memory (in kilobytes) to set for the domain. + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_max_mem(int domid, uint64_t max_memkb); + +/** + * @brief Sets the address size for a specified domain. + * + * @param domid The ID of the domain. + * @param addr_size The address size to be set. + * @return 0 on success, negative error code on failure. + */ int xen_domctl_set_address_size(int domid, int addr_size); + +/** + * @brief Set IOMEM permission for a domain. + * + * @param domid The ID of the domain for which IOMEM permission is being set. + * @param first_mfn The starting machine frame number of the memory range. + * @param nr_mfns The number of MFNs in the memory range. + * @param allow_access Flag indicating whether to allow or deny access to the + * specified memory range. A non-zero value allows access, + * while a zero value denies access. + * + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_iomem_permission(int domid, uint64_t first_mfn, uint64_t nr_mfns, uint8_t allow_access); + +/** + * @brief Maps a range of machine memory to a range of guest memory. + * + * @param domid The domain ID of the target domain. + * @param first_gfn The first guest frame number to map. + * @param first_mfn The first machine frame number to map. + * @param nr_mfns The number of machine frames to map. + * @param add_mapping Flag indicating whether to add or remove the mapping. + * + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_memory_mapping(int domid, uint64_t first_gfn, uint64_t first_mfn, uint64_t nr_mfns, uint32_t add_mapping); + +/** + * @brief Assign a device to a guest. Sets up IOMMU structures. + * + * @param domid The ID of the domain to which the device is to be assigned. + * @param dtdev_path The path of the device tree device to be assigned. + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_assign_dt_device(int domid, char *dtdev_path); + +/** + * @brief Binds a physical IRQ to a specified domain. + * + * Only supports SPI IRQs for now. + * + * @param domid The ID of the domain to bind the IRQ to. + * @param machine_irq The machine IRQ number to bind. + * @param irq_type The type of IRQ to bind (PT_IRQ_TYPE_SPI). + * @param bus The PCI bus number of the device generating the IRQ. (optional) + * @param device The PCI device number generating the IRQ. (optional) + * @param intx The PCI INTx line number of the device generating the IRQ. (optional) + * @param isa_irq The ISA IRQ number to bind. (optional) + * @param spi The shared peripheral interrupt (SPI) number to bind. (optional) + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_bind_pt_irq(int domid, uint32_t machine_irq, uint8_t irq_type, uint8_t bus, uint8_t device, uint8_t intx, uint8_t isa_irq, uint16_t spi); + +/** + * @brief Set the maximum number of vCPUs for a domain. + * + * The parameter passed to XEN_DOMCTL_max_vcpus must match the value passed to + * XEN_DOMCTL_createdomain. This hypercall is in the process of being removed + * (once the failure paths in domain_create() have been improved), but is + * still required in the short term to allocate the vcpus themselves. + * + * @param domid The ID of the domain. + * @param max_vcpus Maximum number of vCPUs to set. + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_max_vcpus(int domid, int max_vcpus); + +/** + * @brief Creates a new domain with the specified domain ID and configuration. + * + * @param domid The domain ID of the new domain. + * @param config Pointer to a structure containing the configuration for the new domain. + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_createdomain(int domid, struct xen_domctl_createdomain *config); + +/** + * @brief Clean and invalidate caches associated with given region of + * guest memory. + * + * @param domid The ID of the domain for which the cache needs to be flushed. + * @param cacheflush A pointer to the `xen_domctl_cacheflush` structure that + * contains the cache flush parameters. + * @return Returns an integer value indicating the success or failure of the + * cache flush operation. + */ int xen_domctl_cacheflush(int domid, struct xen_domctl_cacheflush *cacheflush); + +/** + * @brief Destroys a Xen domain. + * + * @param domid The ID of the domain to be destroyed. + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_destroydomain(int domid); + +/** + * @brief Retrieves information about a specific virtual CPU (vCPU) in a Xen domain. + * + * @param domid The ID of the domain. + * @param vcpu The index of the vCPU. + * @param[out] info Pointer to a structure to store the vCPU information. + * @return 0 on success, or a negative error code on failure. + */ int xen_domctl_getvcpu(int domid, uint32_t vcpu, struct xen_domctl_getvcpuinfo *info); #endif /* __XEN_DOM0_DOMCTL_H__ */ From 1e74e9885b49bc34478122d6304f22eda1a77470 Mon Sep 17 00:00:00 2001 From: Mykyta Poturai Date: Wed, 24 Apr 2024 15:07:59 +0300 Subject: [PATCH 06/14] xen: domctl: Get back created domain id If 0 is passed as domain id to the Xen createdomain hypercall, it will allocate a new domain id and return it via the domctl structure. Allow callers to access this new domain id via a pointer arg. This will allow to create domains without explicitly specifying the domain id for them. Signed-off-by: Mykyta Poturai Signed-off-by: Dmytro Firsov --- drivers/xen/dom0/domctl.c | 22 +++++++++++++++------- include/zephyr/xen/dom0/domctl.h | 8 ++++++-- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/xen/dom0/domctl.c b/drivers/xen/dom0/domctl.c index 0c6d184966298..ea7b2448cc585 100644 --- a/drivers/xen/dom0/domctl.c +++ b/drivers/xen/dom0/domctl.c @@ -273,15 +273,23 @@ int xen_domctl_max_vcpus(int domid, int max_vcpus) return do_domctl(&domctl); } -int xen_domctl_createdomain(int domid, struct xen_domctl_createdomain *config) +int xen_domctl_createdomain(int *domid, struct xen_domctl_createdomain *config) { - xen_domctl_t domctl = { - .cmd = XEN_DOMCTL_createdomain, - .domain = domid, - .u.createdomain = *config, - }; + int ret; + xen_domctl_t domctl; - return do_domctl(&domctl); + if (!domid || !config) { + return -EINVAL; + } + + domctl.cmd = XEN_DOMCTL_createdomain, + domctl.domain = *domid, + domctl.u.createdomain = *config, + + ret = do_domctl(&domctl); + *domid = domctl.domain; + + return ret; } int xen_domctl_destroydomain(int domid) diff --git a/include/zephyr/xen/dom0/domctl.h b/include/zephyr/xen/dom0/domctl.h index 3b947390676bb..1c518ffe3125b 100644 --- a/include/zephyr/xen/dom0/domctl.h +++ b/include/zephyr/xen/dom0/domctl.h @@ -196,11 +196,15 @@ int xen_domctl_max_vcpus(int domid, int max_vcpus); /** * @brief Creates a new domain with the specified domain ID and configuration. * - * @param domid The domain ID of the new domain. + * NB. domid is an IN/OUT parameter for this operation. + * If it is specified as an invalid value (0 or >= DOMID_FIRST_RESERVED), + * an id is auto-allocated and returned. + + * @param[in,out] domid Pointer to domain ID of the new domain. * @param config Pointer to a structure containing the configuration for the new domain. * @return 0 on success, or a negative error code on failure. */ -int xen_domctl_createdomain(int domid, struct xen_domctl_createdomain *config); +int xen_domctl_createdomain(int *domid, struct xen_domctl_createdomain *config); /** * @brief Clean and invalidate caches associated with given region of From 89ae6a2b750b20d2b9866a757b1b2f0eb8131909 Mon Sep 17 00:00:00 2001 From: Mykyta Poturai Date: Fri, 14 Jun 2024 12:33:42 +0300 Subject: [PATCH 07/14] xen: Add support for changing Xen Domctl interface version Add a new Kconfig option CONFIG_XEN_DOMCTL_INTERFACE_VERSION that allows to change the version of the Domctl interface used by Zephyr to issue domctl hypercalls. Add compile-time checks to enable or disable certain Domctl operations based on the selected Domctl interface version. For now versions 0x15, 0x16, and 0x17 are supported. Also it required to correctly guard domctl call that were not supported prior to specified version. Signed-off-by: Mykyta Poturai Signed-off-by: Dmytro Firsov --- arch/arm64/core/xen/Kconfig | 11 +++++++++++ drivers/xen/dom0/domctl.c | 4 +++- include/zephyr/xen/public/domctl.h | 8 ++++++-- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/arm64/core/xen/Kconfig b/arch/arm64/core/xen/Kconfig index f860f9f1d4faa..40e2e321b07b7 100644 --- a/arch/arm64/core/xen/Kconfig +++ b/arch/arm64/core/xen/Kconfig @@ -33,3 +33,14 @@ config XEN_INTERFACE_VERSION help Xen interface version to use. This is the version of the interface that Zephyr will use to communicate with the hypervisor. + +config XEN_DOMCTL_INTERFACE_VERSION + hex "Xen Domctl interface version" + default 0x17 + range 0x15 0x17 + depends on XEN + help + Xen Domctl interface version to use. This is the version of the + domctl interface that Zephyr will use to communicate with + the hypervisor. The default value is the latest version supported + by the kernel. diff --git a/drivers/xen/dom0/domctl.c b/drivers/xen/dom0/domctl.c index ea7b2448cc585..10c8e0813b6b7 100644 --- a/drivers/xen/dom0/domctl.c +++ b/drivers/xen/dom0/domctl.c @@ -16,7 +16,7 @@ static int do_domctl(xen_domctl_t *domctl) { - domctl->interface_version = XEN_DOMCTL_INTERFACE_VERSION; + domctl->interface_version = CONFIG_XEN_DOMCTL_INTERFACE_VERSION; return HYPERVISOR_domctl(domctl); } @@ -105,6 +105,7 @@ int xen_domctl_getdomaininfo(int domid, xen_domctl_getdomaininfo_t *dom_info) return 0; } +#if CONFIG_XEN_DOMCTL_INTERFACE_VERSION >= 0x00000016 int xen_domctl_get_paging_mempool_size(int domid, uint64_t *size) { int rc; @@ -133,6 +134,7 @@ int xen_domctl_set_paging_mempool_size(int domid, uint64_t size) return do_domctl(&domctl); } +#endif int xen_domctl_max_mem(int domid, uint64_t max_memkb) { diff --git a/include/zephyr/xen/public/domctl.h b/include/zephyr/xen/public/domctl.h index baa6b137f5140..cb5c0924032e8 100644 --- a/include/zephyr/xen/public/domctl.h +++ b/include/zephyr/xen/public/domctl.h @@ -20,8 +20,6 @@ #include "grant_table.h" #include "memory.h" -#define XEN_DOMCTL_INTERFACE_VERSION 0x00000015 - /* * NB. xen_domctl.domain is an IN/OUT parameter for this operation. * If it is specified as an invalid value (0 or >= DOMID_FIRST_RESERVED), @@ -411,6 +409,7 @@ struct xen_domctl_cacheflush { xen_pfn_t start_pfn, nr_pfns; }; +#if CONFIG_XEN_DOMCTL_INTERFACE_VERSION >= 0x00000016 /* * XEN_DOMCTL_get_paging_mempool_size / XEN_DOMCTL_set_paging_mempool_size. * @@ -426,6 +425,7 @@ struct xen_domctl_cacheflush { struct xen_domctl_paging_mempool { uint64_aligned_t size; /* Size in bytes. */ }; +#endif struct xen_domctl { uint32_t cmd; @@ -497,8 +497,10 @@ struct xen_domctl { #define XEN_DOMCTL_get_cpu_policy 82 #define XEN_DOMCTL_set_cpu_policy 83 #define XEN_DOMCTL_vmtrace_op 84 +#if CONFIG_XEN_DOMCTL_INTERFACE_VERSION >= 0x00000016 #define XEN_DOMCTL_get_paging_mempool_size 85 #define XEN_DOMCTL_set_paging_mempool_size 86 +#endif #define XEN_DOMCTL_gdbsx_guestmemio 1000 #define XEN_DOMCTL_gdbsx_pausevcpu 1001 #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 @@ -520,7 +522,9 @@ struct xen_domctl { struct xen_domctl_bind_pt_irq bind_pt_irq; struct xen_domctl_memory_mapping memory_mapping; struct xen_domctl_cacheflush cacheflush; +#if CONFIG_XEN_DOMCTL_INTERFACE_VERSION >= 0x00000016 struct xen_domctl_paging_mempool paging_mempool; +#endif uint8_t pad[128]; } u; }; From 962319cb001583b6fde531abd8d733b629c83586 Mon Sep 17 00:00:00 2001 From: Mykyta Poturai Date: Fri, 14 Jun 2024 12:38:39 +0300 Subject: [PATCH 08/14] xen: Add support for changing Xen Sysctl interface version Add a new Kconfig option CONFIG_XEN_SYSCTL_INTERFACE_VERSION that allows to change the version of the Sysctl interface used by Zephyr to issue sysctl hypercalls. For now versions 0x15 is supported. Signed-off-by: Mykyta Poturai Signed-off-by: Dmytro Firsov --- arch/arm64/core/xen/Kconfig | 11 +++++++++++ drivers/xen/dom0/sysctl.c | 2 +- include/zephyr/xen/public/sysctl.h | 2 -- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm64/core/xen/Kconfig b/arch/arm64/core/xen/Kconfig index 40e2e321b07b7..768c17a70859a 100644 --- a/arch/arm64/core/xen/Kconfig +++ b/arch/arm64/core/xen/Kconfig @@ -44,3 +44,14 @@ config XEN_DOMCTL_INTERFACE_VERSION domctl interface that Zephyr will use to communicate with the hypervisor. The default value is the latest version supported by the kernel. + +config XEN_SYSCTL_INTERFACE_VERSION + hex "Xen Sysctl interface version" + default 0x15 + range 0x15 0x15 + depends on XEN + help + Xen Sysctl interface version to use. This is the version of the + domctl interface that Zephyr will use to communicate with + the hypervisor. The default value is the latest version supported + by the kernel. diff --git a/drivers/xen/dom0/sysctl.c b/drivers/xen/dom0/sysctl.c index 5bccf9e5c321f..2ff6af3ef3a81 100644 --- a/drivers/xen/dom0/sysctl.c +++ b/drivers/xen/dom0/sysctl.c @@ -11,7 +11,7 @@ static int do_sysctl(xen_sysctl_t *sysctl) { - sysctl->interface_version = XEN_SYSCTL_INTERFACE_VERSION; + sysctl->interface_version = CONFIG_XEN_SYSCTL_INTERFACE_VERSION; return HYPERVISOR_sysctl(sysctl); } diff --git a/include/zephyr/xen/public/sysctl.h b/include/zephyr/xen/public/sysctl.h index eb07500b38060..51865a6225705 100644 --- a/include/zephyr/xen/public/sysctl.h +++ b/include/zephyr/xen/public/sysctl.h @@ -20,8 +20,6 @@ #include "xen.h" #include "domctl.h" -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000015 - /* * Get physical information about the host machine */ From b16cc64c554dfc6f5bcec710a8a6a40843448aae Mon Sep 17 00:00:00 2001 From: TOKITA Hiroshi Date: Sun, 18 May 2025 09:22:41 +0900 Subject: [PATCH 09/14] drivers: xen: add DMOP hypercall wrappers Add wrappers for following XEN_DMOP_* hypercalls. These enables Xen device model control path: dm_op provides operations to create/manage the ioreq server so guest MMIO accesses are trapped and handled by the hypervisor. These are guarded by CONFIG_XEN_DMOP. - dmop - dmop_create_ioreq_server XEN_DMOP_create_ioreq_server - dmop_map_io_range_to_ioreq_server XEN_DMOP_map_io_range_to_ioreq_server - dmop_set_ioreq_server_state XEN_DMOP_set_ioreq_server_state - dmop_nr_vcpus XEN_DMOP_nr_vcpus - dmop_set_irq_level: XEN_DMOP_set_irq_level Signed-off-by: TOKITA Hiroshi --- arch/arm64/core/xen/hypercall.S | 1 + drivers/xen/CMakeLists.txt | 1 + drivers/xen/Kconfig | 6 + drivers/xen/dmop.c | 157 ++++++++ include/zephyr/arch/arm64/hypercall.h | 3 + include/zephyr/xen/dmop.h | 114 ++++++ include/zephyr/xen/public/hvm/dm_op.h | 503 +++++++++++++++++++++++++ include/zephyr/xen/public/hvm/hvm_op.h | 13 + include/zephyr/xen/public/hvm/ioreq.h | 125 ++++++ 9 files changed, 923 insertions(+) create mode 100644 drivers/xen/dmop.c create mode 100644 include/zephyr/xen/dmop.h create mode 100644 include/zephyr/xen/public/hvm/dm_op.h create mode 100644 include/zephyr/xen/public/hvm/ioreq.h diff --git a/arch/arm64/core/xen/hypercall.S b/arch/arm64/core/xen/hypercall.S index 54e825765debc..02683337377c9 100644 --- a/arch/arm64/core/xen/hypercall.S +++ b/arch/arm64/core/xen/hypercall.S @@ -23,6 +23,7 @@ HYPERCALL(sched_op); HYPERCALL(event_channel_op); HYPERCALL(hvm_op); HYPERCALL(memory_op); +HYPERCALL(dm_op); HYPERCALL(xen_version); #ifdef CONFIG_XEN_DOM0 diff --git a/drivers/xen/CMakeLists.txt b/drivers/xen/CMakeLists.txt index 2f0a060ece3ab..ecdaf526c37eb 100644 --- a/drivers/xen/CMakeLists.txt +++ b/drivers/xen/CMakeLists.txt @@ -5,6 +5,7 @@ zephyr_sources(hvm.c) zephyr_sources(events.c) zephyr_sources_ifdef(CONFIG_XEN_GRANT_TABLE gnttab.c) zephyr_sources(memory.c) +zephyr_sources_ifdef(CONFIG_XEN_DMOP dmop.c) zephyr_sources(version.c) add_subdirectory_ifdef(CONFIG_XEN_DOM0 dom0) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 99ad113887b12..578f28775b58a 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -5,6 +5,12 @@ if XEN menu "Xen drivers" +config XEN_DMOP + bool "Xen dmop hypercall wrappers" + help + Enable lightweight wrappers for Xen dm_op hypercalls used by + Xen device backends. Disable to drop dmop.c from the build. + config XEN_GRANT_TABLE bool "Xen grant table driver" depends on HEAP_MEM_POOL_SIZE > 0 diff --git a/drivers/xen/dmop.c b/drivers/xen/dmop.c new file mode 100644 index 0000000000000..92ebf99ae6e5e --- /dev/null +++ b/drivers/xen/dmop.c @@ -0,0 +1,157 @@ +/* + * Copyright 2025 TOKITA Hiroshi + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include + +int dmop_create_ioreq_server(domid_t domid, uint8_t handle_bufioreq, ioservid_t *id) +{ + struct xen_dm_op_buf bufs[1] = {0}; + struct xen_dm_op dm_op = {0}; + int err; + + dm_op.op = XEN_DMOP_create_ioreq_server; + dm_op.u.create_ioreq_server.handle_bufioreq = handle_bufioreq; + + set_xen_guest_handle(bufs[0].h, &dm_op); + bufs[0].size = sizeof(struct xen_dm_op); + + err = HYPERVISOR_dm_op(domid, ARRAY_SIZE(bufs), bufs); + if (err) { + return err; + } + + *id = dm_op.u.create_ioreq_server.id; + + return 0; +} + +int dmop_destroy_ioreq_server(domid_t domid, ioservid_t id) +{ + struct xen_dm_op_buf bufs[1] = {0}; + struct xen_dm_op dm_op = {0}; + int err; + + dm_op.op = XEN_DMOP_destroy_ioreq_server; + dm_op.u.destroy_ioreq_server.id = id; + + set_xen_guest_handle(bufs[0].h, &dm_op); + bufs[0].size = sizeof(struct xen_dm_op); + + err = HYPERVISOR_dm_op(domid, ARRAY_SIZE(bufs), bufs); + if (err) { + return err; + } + + return 0; +} + +int dmop_map_io_range_to_ioreq_server(domid_t domid, ioservid_t id, uint32_t type, uint64_t start, + uint64_t end) +{ + struct xen_dm_op_buf bufs[1] = {0}; + struct xen_dm_op dm_op = {0}; + int err; + + dm_op.op = XEN_DMOP_map_io_range_to_ioreq_server; + dm_op.u.map_io_range_to_ioreq_server.id = id; + dm_op.u.map_io_range_to_ioreq_server.type = type; + dm_op.u.map_io_range_to_ioreq_server.start = start; + dm_op.u.map_io_range_to_ioreq_server.end = end; + + set_xen_guest_handle(bufs[0].h, &dm_op); + bufs[0].size = sizeof(struct xen_dm_op); + + err = HYPERVISOR_dm_op(domid, ARRAY_SIZE(bufs), bufs); + if (err < 0) { + return err; + } + + return 0; +} + +int dmop_unmap_io_range_from_ioreq_server(domid_t domid, ioservid_t id, uint32_t type, + uint64_t start, uint64_t end) +{ + struct xen_dm_op_buf bufs[1] = {0}; + struct xen_dm_op dm_op = {0}; + int err; + + dm_op.op = XEN_DMOP_unmap_io_range_from_ioreq_server; + dm_op.u.unmap_io_range_from_ioreq_server.id = id; + dm_op.u.unmap_io_range_from_ioreq_server.type = type; + dm_op.u.unmap_io_range_from_ioreq_server.start = start; + dm_op.u.unmap_io_range_from_ioreq_server.end = end; + + set_xen_guest_handle(bufs[0].h, &dm_op); + bufs[0].size = sizeof(struct xen_dm_op); + + err = HYPERVISOR_dm_op(domid, ARRAY_SIZE(bufs), bufs); + if (err < 0) { + return err; + } + + return 0; +} + +int dmop_set_ioreq_server_state(domid_t domid, ioservid_t id, uint8_t enabled) +{ + struct xen_dm_op_buf bufs[1] = {0}; + struct xen_dm_op dm_op = {0}; + int err; + + dm_op.op = XEN_DMOP_set_ioreq_server_state; + dm_op.u.set_ioreq_server_state.id = id; + dm_op.u.set_ioreq_server_state.enabled = enabled; + + set_xen_guest_handle(bufs[0].h, &dm_op); + bufs[0].size = sizeof(struct xen_dm_op); + + err = HYPERVISOR_dm_op(domid, 1, bufs); + if (err) { + return err; + } + + return 0; +} + +int dmop_nr_vcpus(domid_t domid) +{ + struct xen_dm_op_buf bufs[1] = {0}; + struct xen_dm_op dm_op = {0}; + int err; + + dm_op.op = XEN_DMOP_nr_vcpus; + + set_xen_guest_handle(bufs[0].h, &dm_op); + bufs[0].size = sizeof(struct xen_dm_op); + + err = HYPERVISOR_dm_op(domid, 1, bufs); + if (err < 0) { + return err; + } + + return dm_op.u.nr_vcpus.vcpus; +} + +int dmop_set_irq_level(domid_t domid, uint32_t irq, uint8_t level) +{ + struct xen_dm_op_buf bufs[1] = {0}; + struct xen_dm_op dm_op = {0}; + int err; + + dm_op.op = XEN_DMOP_set_irq_level; + dm_op.u.set_irq_level.irq = irq; + dm_op.u.set_irq_level.level = level; + + set_xen_guest_handle(bufs[0].h, &dm_op); + bufs[0].size = sizeof(struct xen_dm_op); + + err = HYPERVISOR_dm_op(domid, 1, bufs); + + return err; +} diff --git a/include/zephyr/arch/arm64/hypercall.h b/include/zephyr/arch/arm64/hypercall.h index 15ef3b33b2c88..df84b70f508c5 100644 --- a/include/zephyr/arch/arm64/hypercall.h +++ b/include/zephyr/arch/arm64/hypercall.h @@ -6,6 +6,8 @@ #ifndef ZEPHYR_INCLUDE_ARCH_ARM64_HYPERCALL_H_ #define ZEPHYR_INCLUDE_ARCH_ARM64_HYPERCALL_H_ +#include + /* defined in hypercall.S by HYPERCALL(hypercall) */ int HYPERVISOR_console_io(int op, int cnt, char *str); int HYPERVISOR_sched_op(int op, void *param); @@ -13,6 +15,7 @@ int HYPERVISOR_event_channel_op(int op, void *param); int HYPERVISOR_hvm_op(int op, void *param); int HYPERVISOR_memory_op(int op, void *param); int HYPERVISOR_grant_table_op(int op, void *uop, unsigned int count); +int HYPERVISOR_dm_op(domid_t domid, unsigned int nr_bufs, struct xen_dm_op_buf *bufs); int HYPERVISOR_xen_version(int op, void *param); #ifdef CONFIG_XEN_DOM0 diff --git a/include/zephyr/xen/dmop.h b/include/zephyr/xen/dmop.h new file mode 100644 index 0000000000000..78d4f15a457ce --- /dev/null +++ b/include/zephyr/xen/dmop.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2025 TOKITA Hiroshi + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_XEN_DMOP_H_ +#define ZEPHYR_XEN_DMOP_H_ + +#include + +/** + * @brief Create an I/O request server in the given Xen domain. + * + * This function issues the XEN_DMOP_create_ioreq_server hypercall to create + * a server that handles I/O requests on behalf of the guest domain. + * + * @param domid Xen domain identifier where the server is created. + * @param handle_bufioreq Flag indicating whether buffered I/O requests should be handled. + * Set to non-zero to enable buffered handling. + * @param id Output pointer to receive the newly created server ID. + * + * @return 0 on success, or a negative errno code on failure. + */ +int dmop_create_ioreq_server(domid_t domid, uint8_t handle_bufioreq, ioservid_t *id); + +/** + * @brief Destroy a previously created I/O request server. + * + * Issues the XEN_DMOP_destroy_ioreq_server hypercall to tear down the + * specified I/O request server. + * + * @param domid Xen domain identifier where the server exists. + * @param id I/O request server ID returned by dmop_create_ioreq_server(). + * + * @return 0 on success, or a negative errno code on failure. + */ +int dmop_destroy_ioreq_server(domid_t domid, ioservid_t id); + +/** + * @brief Map a specified I/O address range to an existing I/O request server. + * + * This function issues the XEN_DMOP_map_io_range_to_ioreq_server hypercall to grant + * access to the given I/O address range for the specified server. + * + * @param domid Xen domain identifier where the mapping is applied. + * @param id I/O request server ID returned by dmop_create_ioreq_server(). + * @param type Type identifier for the I/O range (e.g., MMIO, port I/O). + * @param start Start physical address of the I/O range. + * @param end End physical address (inclusive) of the I/O range. + * + * @return 0 on success, or a negative errno code on failure. + */ +int dmop_map_io_range_to_ioreq_server(domid_t domid, ioservid_t id, uint32_t type, uint64_t start, + uint64_t end); + +/** + * @brief Unmap an I/O address range from an I/O request server. + * + * Issues the XEN_DMOP_unmap_io_range_from_ioreq_server hypercall to revoke + * access to a previously mapped I/O address range. + * + * @param domid Xen domain identifier where the unmapping is applied. + * @param id I/O request server ID returned by dmop_create_ioreq_server(). + * @param type Type identifier for the I/O range (e.g., MMIO, port I/O). + * @param start Start physical address of the I/O range. + * @param end End physical address (inclusive) of the I/O range. + * + * @return 0 on success, or a negative errno code on failure. + */ +int dmop_unmap_io_range_from_ioreq_server(domid_t domid, ioservid_t id, uint32_t type, + uint64_t start, uint64_t end); + +/** + * @brief Enable or disable an existing I/O request server. + * + * This function issues the XEN_DMOP_set_ioreq_server_state hypercall to change + * the operational state of the specified I/O request server. + * + * @param domid Xen domain identifier. + * @param id I/O request server ID to modify. + * @param enabled Non-zero to enable the server, zero to disable it. + * + * @return 0 on success, or a negative errno code on failure. + */ +int dmop_set_ioreq_server_state(domid_t domid, ioservid_t id, uint8_t enabled); + +/** + * @brief Query the number of virtual CPUs in a Xen domain. + * + * This function issues the XEN_DMOP_nr_vcpus hypercall to retrieve + * the current vCPU count for the specified domain. + * + * @param domid Xen domain identifier to query. + * + * @return The number of vCPUs on success, or a negative errno code on failure. + */ +int dmop_nr_vcpus(domid_t domid); + +/** + * @brief Set the interrupt level for a specific IRQ in a Xen domain. + * + * This function issues the XEN_DMOP_set_irq_level hypercall to adjust + * the signal level (assert or deassert) for the given IRQ line. + * + * @param domid Xen domain identifier. + * @param irq IRQ number whose level is to be set. + * @param level Non-zero to assert (raise) the IRQ, zero to deassert (lower) it. + * + * @return 0 on success, or a negative errno code on failure. + */ +int dmop_set_irq_level(domid_t domid, uint32_t irq, uint8_t level); + +#endif /* ZEPHYR_XEN_DMOP_H_ */ diff --git a/include/zephyr/xen/public/hvm/dm_op.h b/include/zephyr/xen/public/hvm/dm_op.h new file mode 100644 index 0000000000000..8439013f8a4a2 --- /dev/null +++ b/include/zephyr/xen/public/hvm/dm_op.h @@ -0,0 +1,503 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (c) 2016, Citrix Systems Inc + * + */ + +#ifndef __XEN_PUBLIC_HVM_DM_OP_H__ +#define __XEN_PUBLIC_HVM_DM_OP_H__ + +#include "../xen.h" +#include "../event_channel.h" + +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif + +/* + * IOREQ Servers + * + * The interface between an I/O emulator an Xen is called an IOREQ Server. + * A domain supports a single 'legacy' IOREQ Server which is instantiated if + * parameter... + * + * HVM_PARAM_IOREQ_PFN is read (to get the gfn containing the synchronous + * ioreq structures), or... + * HVM_PARAM_BUFIOREQ_PFN is read (to get the gfn containing the buffered + * ioreq ring), or... + * HVM_PARAM_BUFIOREQ_EVTCHN is read (to get the event channel that Xen uses + * to request buffered I/O emulation). + * + * The following hypercalls facilitate the creation of IOREQ Servers for + * 'secondary' emulators which are invoked to implement port I/O, memory, or + * PCI config space ranges which they explicitly register. + */ + +typedef uint16_t ioservid_t; + +/* + * XEN_DMOP_create_ioreq_server: Instantiate a new IOREQ Server for a + * secondary emulator. + * + * The handed back is unique for target domain. The valur of + * should be one of HVM_IOREQSRV_BUFIOREQ_* defined in + * hvm_op.h. If the value is HVM_IOREQSRV_BUFIOREQ_OFF then the buffered + * ioreq ring will not be allocated and hence all emulation requests to + * this server will be synchronous. + */ +#define XEN_DMOP_create_ioreq_server 1 + +struct xen_dm_op_create_ioreq_server { + /* IN - should server handle buffered ioreqs */ + uint8_t handle_bufioreq; + uint8_t pad[3]; + /* OUT - server id */ + ioservid_t id; +}; +typedef struct xen_dm_op_create_ioreq_server xen_dm_op_create_ioreq_server_t; + +/* + * XEN_DMOP_get_ioreq_server_info: Get all the information necessary to + * access IOREQ Server . + * + * If the IOREQ Server is handling buffered emulation requests, the + * emulator needs to bind to event channel to listen for + * them. (The event channels used for synchronous emulation requests are + * specified in the per-CPU ioreq structures). + * In addition, if the XENMEM_acquire_resource memory op cannot be used, + * the emulator will need to map the synchronous ioreq structures and + * buffered ioreq ring (if it exists) from guest memory. If does + * not contain XEN_DMOP_no_gfns then these pages will be made available and + * the frame numbers passed back in gfns and + * respectively. (If the IOREQ Server is not handling buffered emulation + * only will be valid). + * + * NOTE: To access the synchronous ioreq structures and buffered ioreq + * ring, it is preferable to use the XENMEM_acquire_resource memory + * op specifying resource type XENMEM_resource_ioreq_server. + */ +#define XEN_DMOP_get_ioreq_server_info 2 + +struct xen_dm_op_get_ioreq_server_info { + /* IN - server id */ + ioservid_t id; + /* IN - flags */ + uint16_t flags; + +#define _XEN_DMOP_no_gfns 0 +#define XEN_DMOP_no_gfns (1u << _XEN_DMOP_no_gfns) + + /* OUT - buffered ioreq port */ + evtchn_port_t bufioreq_port; + /* OUT - sync ioreq gfn (see block comment above) */ + uint64_aligned_t ioreq_gfn; + /* OUT - buffered ioreq gfn (see block comment above)*/ + uint64_aligned_t bufioreq_gfn; +}; +typedef struct xen_dm_op_get_ioreq_server_info xen_dm_op_get_ioreq_server_info_t; + +/* + * XEN_DMOP_map_io_range_to_ioreq_server: Register an I/O range for + * emulation by the client of + * IOREQ Server . + * XEN_DMOP_unmap_io_range_from_ioreq_server: Deregister an I/O range + * previously registered for + * emulation by the client of + * IOREQ Server . + * + * There are three types of I/O that can be emulated: port I/O, memory + * accesses and PCI config space accesses. The field denotes which + * type of range* the and (inclusive) fields are specifying. + * PCI config space ranges are specified by segment/bus/device/function + * values which should be encoded using the DMOP_PCI_SBDF helper macro + * below. + * + * NOTE: unless an emulation request falls entirely within a range mapped + * by a secondary emulator, it will not be passed to that emulator. + */ +#define XEN_DMOP_map_io_range_to_ioreq_server 3 +#define XEN_DMOP_unmap_io_range_from_ioreq_server 4 + +struct xen_dm_op_ioreq_server_range { + /* IN - server id */ + ioservid_t id; + uint16_t pad; + /* IN - type of range */ + uint32_t type; +#define XEN_DMOP_IO_RANGE_PORT 0 /* I/O port range */ +#define XEN_DMOP_IO_RANGE_MEMORY 1 /* MMIO range */ +#define XEN_DMOP_IO_RANGE_PCI 2 /* PCI segment/bus/dev/func range */ + /* IN - inclusive start and end of range */ + uint64_aligned_t start, end; +}; +typedef struct xen_dm_op_ioreq_server_range xen_dm_op_ioreq_server_range_t; + +#define XEN_DMOP_PCI_SBDF(s, b, d, f) \ + ((((s) & 0xffff) << 16) | (((b) & 0xff) << 8) | (((d) & 0x1f) << 3) | ((f) & 0x07)) + +/* + * XEN_DMOP_set_ioreq_server_state: Enable or disable the IOREQ Server + * + * The IOREQ Server will not be passed any emulation requests until it is + * in the enabled state. + * Note that the contents of the ioreq_gfn and bufioreq_gfn (see + * XEN_DMOP_get_ioreq_server_info) are not meaningful until the IOREQ Server + * is in the enabled state. + */ +#define XEN_DMOP_set_ioreq_server_state 5 + +struct xen_dm_op_set_ioreq_server_state { + /* IN - server id */ + ioservid_t id; + /* IN - enabled? */ + uint8_t enabled; + uint8_t pad; +}; +typedef struct xen_dm_op_set_ioreq_server_state xen_dm_op_set_ioreq_server_state_t; + +/* + * XEN_DMOP_destroy_ioreq_server: Destroy the IOREQ Server . + * + * Any registered I/O ranges will be automatically deregistered. + */ +#define XEN_DMOP_destroy_ioreq_server 6 + +struct xen_dm_op_destroy_ioreq_server { + /* IN - server id */ + ioservid_t id; + uint16_t pad; +}; +typedef struct xen_dm_op_destroy_ioreq_server xen_dm_op_destroy_ioreq_server_t; + +/* + * XEN_DMOP_track_dirty_vram: Track modifications to the specified pfn + * range. + * + * NOTE: The bitmap passed back to the caller is passed in a + * secondary buffer. + */ +#define XEN_DMOP_track_dirty_vram 7 + +struct xen_dm_op_track_dirty_vram { + /* IN - number of pages to be tracked */ + uint32_t nr; + uint32_t pad; + /* IN - first pfn to track */ + uint64_aligned_t first_pfn; +}; +typedef struct xen_dm_op_track_dirty_vram xen_dm_op_track_dirty_vram_t; + +/* + * XEN_DMOP_set_pci_intx_level: Set the logical level of one of a domain's + * PCI INTx pins. + */ +#define XEN_DMOP_set_pci_intx_level 8 + +struct xen_dm_op_set_pci_intx_level { + /* IN - PCI INTx identification (domain:bus:device:intx) */ + uint16_t domain; + uint8_t bus, device, intx; + /* IN - Level: 0 -> deasserted, 1 -> asserted */ + uint8_t level; +}; +typedef struct xen_dm_op_set_pci_intx_level xen_dm_op_set_pci_intx_level_t; + +/* + * XEN_DMOP_set_isa_irq_level: Set the logical level of a one of a domain's + * ISA IRQ lines. + */ +#define XEN_DMOP_set_isa_irq_level 9 + +struct xen_dm_op_set_isa_irq_level { + /* IN - ISA IRQ (0-15) */ + uint8_t isa_irq; + /* IN - Level: 0 -> deasserted, 1 -> asserted */ + uint8_t level; +}; +typedef struct xen_dm_op_set_isa_irq_level xen_dm_op_set_isa_irq_level_t; + +/* + * XEN_DMOP_set_pci_link_route: Map a PCI INTx line to an IRQ line. + */ +#define XEN_DMOP_set_pci_link_route 10 + +struct xen_dm_op_set_pci_link_route { + /* PCI INTx line (0-3) */ + uint8_t link; + /* ISA IRQ (1-15) or 0 -> disable link */ + uint8_t isa_irq; +}; +typedef struct xen_dm_op_set_pci_link_route xen_dm_op_set_pci_link_route_t; + +/* + * XEN_DMOP_modified_memory: Notify that a set of pages were modified by + * an emulator. + * + * DMOP buf 1 contains an array of xen_dm_op_modified_memory_extent with + * @nr_extents entries. + * + * On error, @nr_extents will contain the index+1 of the extent that + * had the error. It is not defined if or which pages may have been + * marked as dirty, in this event. + */ +#define XEN_DMOP_modified_memory 11 + +struct xen_dm_op_modified_memory { + /* + * IN - Number of extents to be processed + * OUT -returns n+1 for failing extent + */ + uint32_t nr_extents; + /* IN/OUT - Must be set to 0 */ + uint32_t opaque; +}; +typedef struct xen_dm_op_modified_memory xen_dm_op_modified_memory_t; + +struct xen_dm_op_modified_memory_extent { + /* IN - number of contiguous pages modified */ + uint32_t nr; + uint32_t pad; + /* IN - first pfn modified */ + uint64_aligned_t first_pfn; +}; + +/* + * XEN_DMOP_set_mem_type: Notify that a region of memory is to be treated + * in a specific way. (See definition of + * hvmmem_type_t). + * + * NOTE: In the event of a continuation (return code -ERESTART), the + * @first_pfn is set to the value of the pfn of the remaining + * region and @nr reduced to the size of the remaining region. + */ +#define XEN_DMOP_set_mem_type 12 + +struct xen_dm_op_set_mem_type { + /* IN - number of contiguous pages */ + uint32_t nr; + /* IN - new hvmmem_type_t of region */ + uint16_t mem_type; + uint16_t pad; + /* IN - first pfn in region */ + uint64_aligned_t first_pfn; +}; +typedef struct xen_dm_op_set_mem_type xen_dm_op_set_mem_type_t; + +/* + * XEN_DMOP_inject_event: Inject an event into a VCPU, which will + * get taken up when it is next scheduled. + * + * Note that the caller should know enough of the state of the CPU before + * injecting, to know what the effect of injecting the event will be. + */ +#define XEN_DMOP_inject_event 13 + +struct xen_dm_op_inject_event { + /* IN - index of vCPU */ + uint32_t vcpuid; + /* IN - interrupt vector */ + uint8_t vector; + /* IN - event type (DMOP_EVENT_* ) */ + uint8_t type; +/* NB. This enumeration precisely matches hvm.h:X86_EVENTTYPE_* */ +#define XEN_DMOP_EVENT_ext_int 0 /* external interrupt */ +#define XEN_DMOP_EVENT_nmi 2 /* nmi */ +#define XEN_DMOP_EVENT_hw_exc 3 /* hardware exception */ +#define XEN_DMOP_EVENT_sw_int 4 /* software interrupt (CD nn) */ +#define XEN_DMOP_EVENT_pri_sw_exc 5 /* ICEBP (F1) */ +#define XEN_DMOP_EVENT_sw_exc 6 /* INT3 (CC), INTO (CE) */ + /* IN - instruction length */ + uint8_t insn_len; + uint8_t pad0; + /* IN - error code (or ~0 to skip) */ + uint32_t error_code; + uint32_t pad1; + /* IN - type-specific extra data (%cr2 for #PF, pending_dbg for #DB) */ + uint64_aligned_t cr2; +}; +typedef struct xen_dm_op_inject_event xen_dm_op_inject_event_t; + +/* + * XEN_DMOP_inject_msi: Inject an MSI for an emulated device. + */ +#define XEN_DMOP_inject_msi 14 + +struct xen_dm_op_inject_msi { + /* IN - MSI data (lower 32 bits) */ + uint32_t data; + uint32_t pad; + /* IN - MSI address (0xfeexxxxx) */ + uint64_aligned_t addr; +}; +typedef struct xen_dm_op_inject_msi xen_dm_op_inject_msi_t; + +/* + * XEN_DMOP_map_mem_type_to_ioreq_server : map or unmap the IOREQ Server + * to specific memory type + * for specific accesses + * + * For now, flags only accept the value of XEN_DMOP_IOREQ_MEM_ACCESS_WRITE, + * which means only write operations are to be forwarded to an ioreq server. + * Support for the emulation of read operations can be added when an ioreq + * server has such requirement in future. + */ +#define XEN_DMOP_map_mem_type_to_ioreq_server 15 + +struct xen_dm_op_map_mem_type_to_ioreq_server { + ioservid_t id; /* IN - ioreq server id */ + uint16_t type; /* IN - memory type */ + uint32_t flags; /* IN - types of accesses to be forwarded to the */ + /* ioreq server. flags with 0 means to unmap the */ + /* ioreq server */ + +#define XEN_DMOP_IOREQ_MEM_ACCESS_READ (1u << 0) +#define XEN_DMOP_IOREQ_MEM_ACCESS_WRITE (1u << 1) + + uint64_t opaque; /* IN/OUT - only used for hypercall continuation, */ + /* has to be set to zero by the caller */ +}; +typedef struct xen_dm_op_map_mem_type_to_ioreq_server xen_dm_op_map_mem_type_to_ioreq_server_t; + +/* + * XEN_DMOP_remote_shutdown : Declare a shutdown for another domain + * Identical to SCHEDOP_remote_shutdown + */ +#define XEN_DMOP_remote_shutdown 16 + +struct xen_dm_op_remote_shutdown { + uint32_t reason; /* SHUTDOWN_* => enum sched_shutdown_reason */ + /* (Other reason values are not blocked) */ +}; +typedef struct xen_dm_op_remote_shutdown xen_dm_op_remote_shutdown_t; + +/* + * XEN_DMOP_relocate_memory : Relocate GFNs for the specified guest. + * Identical to XENMEM_add_to_physmap with + * space == XENMAPSPACE_gmfn_range. + */ +#define XEN_DMOP_relocate_memory 17 + +struct xen_dm_op_relocate_memory { + /* All fields are IN/OUT, with their OUT state undefined. */ + /* Number of GFNs to process. */ + uint32_t size; + uint32_t pad; + /* Starting GFN to relocate. */ + uint64_aligned_t src_gfn; + /* Starting GFN where GFNs should be relocated. */ + uint64_aligned_t dst_gfn; +}; +typedef struct xen_dm_op_relocate_memory xen_dm_op_relocate_memory_t; + +/* + * XEN_DMOP_pin_memory_cacheattr : Pin caching type of RAM space. + * Identical to XEN_DOMCTL_pin_mem_cacheattr. + */ +#define XEN_DMOP_pin_memory_cacheattr 18 + +struct xen_dm_op_pin_memory_cacheattr { + uint64_aligned_t start; /* Start gfn. */ + uint64_aligned_t end; /* End gfn. */ +/* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */ +#define XEN_DMOP_MEM_CACHEATTR_UC 0 +#define XEN_DMOP_MEM_CACHEATTR_WC 1 +#define XEN_DMOP_MEM_CACHEATTR_WT 4 +#define XEN_DMOP_MEM_CACHEATTR_WP 5 +#define XEN_DMOP_MEM_CACHEATTR_WB 6 +#define XEN_DMOP_MEM_CACHEATTR_UCM 7 +#define XEN_DMOP_DELETE_MEM_CACHEATTR (~(uint32_t)0) + uint32_t type; /* XEN_DMOP_MEM_CACHEATTR_* */ + uint32_t pad; +}; +typedef struct xen_dm_op_pin_memory_cacheattr xen_dm_op_pin_memory_cacheattr_t; + +/* + * XEN_DMOP_set_irq_level: Set the logical level of a one of a domain's + * IRQ lines (currently Arm only). + * Only SPIs are supported. + */ +#define XEN_DMOP_set_irq_level 19 + +struct xen_dm_op_set_irq_level { + uint32_t irq; + /* IN - Level: 0 -> deasserted, 1 -> asserted */ + uint8_t level; + uint8_t pad[3]; +}; +typedef struct xen_dm_op_set_irq_level xen_dm_op_set_irq_level_t; + +/* + * XEN_DMOP_nr_vcpus: Query the number of vCPUs a domain has. + * + * This is the number of vcpu objects allocated in Xen for the domain, and is + * fixed from creation time. This bound is applicable to e.g. the vcpuid + * parameter of XEN_DMOP_inject_event, or number of struct ioreq objects + * mapped via XENMEM_acquire_resource. + */ +#define XEN_DMOP_nr_vcpus 20 + +struct xen_dm_op_nr_vcpus { + uint32_t vcpus; /* OUT */ +}; +typedef struct xen_dm_op_nr_vcpus xen_dm_op_nr_vcpus_t; + +struct xen_dm_op { + uint32_t op; + uint32_t pad; + union { + xen_dm_op_create_ioreq_server_t create_ioreq_server; + xen_dm_op_get_ioreq_server_info_t get_ioreq_server_info; + xen_dm_op_ioreq_server_range_t map_io_range_to_ioreq_server; + xen_dm_op_ioreq_server_range_t unmap_io_range_from_ioreq_server; + xen_dm_op_set_ioreq_server_state_t set_ioreq_server_state; + xen_dm_op_destroy_ioreq_server_t destroy_ioreq_server; + xen_dm_op_track_dirty_vram_t track_dirty_vram; + xen_dm_op_set_pci_intx_level_t set_pci_intx_level; + xen_dm_op_set_isa_irq_level_t set_isa_irq_level; + xen_dm_op_set_irq_level_t set_irq_level; + xen_dm_op_set_pci_link_route_t set_pci_link_route; + xen_dm_op_modified_memory_t modified_memory; + xen_dm_op_set_mem_type_t set_mem_type; + xen_dm_op_inject_event_t inject_event; + xen_dm_op_inject_msi_t inject_msi; + xen_dm_op_map_mem_type_to_ioreq_server_t map_mem_type_to_ioreq_server; + xen_dm_op_remote_shutdown_t remote_shutdown; + xen_dm_op_relocate_memory_t relocate_memory; + xen_dm_op_pin_memory_cacheattr_t pin_memory_cacheattr; + xen_dm_op_nr_vcpus_t nr_vcpus; + } u; +}; + +struct xen_dm_op_buf { + XEN_GUEST_HANDLE(void) h; + xen_ulong_t size; +}; +typedef struct xen_dm_op_buf xen_dm_op_buf_t; +DEFINE_XEN_GUEST_HANDLE(xen_dm_op_buf_t); + +/* ` enum neg_errnoval + * ` HYPERVISOR_dm_op(domid_t domid, + * ` unsigned int nr_bufs, + * ` xen_dm_op_buf_t bufs[]) + * ` + * + * @domid is the domain the hypercall operates on. + * @nr_bufs is the number of buffers in the @bufs array. + * @bufs points to an array of buffers where @bufs[0] contains a struct + * xen_dm_op, describing the specific device model operation and its + * parameters. + * @bufs[1..] may be referenced in the parameters for the purposes of + * passing extra information to or from the domain. + */ + +#endif /* __XEN_PUBLIC_HVM_DM_OP_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/zephyr/xen/public/hvm/hvm_op.h b/include/zephyr/xen/public/hvm/hvm_op.h index 89a63dc1291ef..f7d2cf545699a 100644 --- a/include/zephyr/xen/public/hvm/hvm_op.h +++ b/include/zephyr/xen/public/hvm/hvm_op.h @@ -39,4 +39,17 @@ struct xen_hvm_param { typedef struct xen_hvm_param xen_hvm_param_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t); +/* + * Definitions relating to DMOP_create_ioreq_server. (Defined here for + * backwards compatibility). + */ + +#define HVM_IOREQSRV_BUFIOREQ_OFF 0 +#define HVM_IOREQSRV_BUFIOREQ_LEGACY 1 +/* + * Use this when read_pointer gets updated atomically and + * the pointer pair gets read atomically: + */ +#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2 + #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ diff --git a/include/zephyr/xen/public/hvm/ioreq.h b/include/zephyr/xen/public/hvm/ioreq.h new file mode 100644 index 0000000000000..7680978345b17 --- /dev/null +++ b/include/zephyr/xen/public/hvm/ioreq.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: MIT */ +/* + * ioreq.h: I/O request definitions for device models + * Copyright (c) 2004, Intel Corporation. + */ + +#ifndef _IOREQ_H_ +#define _IOREQ_H_ + +#define IOREQ_READ 1 +#define IOREQ_WRITE 0 + +#define STATE_IOREQ_NONE 0 +#define STATE_IOREQ_READY 1 +#define STATE_IOREQ_INPROCESS 2 +#define STATE_IORESP_READY 3 + +#define IOREQ_TYPE_PIO 0 /* pio */ +#define IOREQ_TYPE_COPY 1 /* mmio ops */ +#define IOREQ_TYPE_PCI_CONFIG 2 +#define IOREQ_TYPE_TIMEOFFSET 7 +#define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ + +/* + * VMExit dispatcher should cooperate with instruction decoder to + * prepare this structure and notify service OS and DM by sending + * virq. + * + * For I/O type IOREQ_TYPE_PCI_CONFIG, the physical address is formatted + * as follows: + * + * 63....48|47..40|39..35|34..32|31........0 + * SEGMENT |BUS |DEV |FN |OFFSET + */ +struct ioreq { + uint64_t addr; /* physical address */ + uint64_t data; /* data (or paddr of data) */ + uint32_t count; /* for rep prefixes */ + uint32_t size; /* size in bytes */ + uint32_t vp_eport; /* evtchn for notifications to/from device model */ + uint16_t _pad0; + uint8_t state: 4; + uint8_t data_is_ptr: 1; /* if 1, data above is the guest paddr */ + /* of the real data to use. */ + uint8_t dir: 1; /* 1=read, 0=write */ + uint8_t df: 1; + uint8_t _pad1: 1; + uint8_t type; /* I/O type */ +}; +typedef struct ioreq ioreq_t; + +struct shared_iopage { + struct ioreq vcpu_ioreq[1]; +}; +typedef struct shared_iopage shared_iopage_t; + +struct buf_ioreq { + uint8_t type; /* I/O type */ + uint8_t pad: 1; + uint8_t dir: 1; /* 1=read, 0=write */ + uint8_t size: 2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two buf_ioreqs */ + uint32_t addr: 20; /* physical address */ + uint32_t data; /* data */ +}; +typedef struct buf_ioreq buf_ioreq_t; + +#define IOREQ_BUFFER_SLOT_NUM 511 /* 8 bytes each, plus 2 4-byte indexes */ +struct buffered_iopage { +#ifdef __XEN__ + union bufioreq_pointers { + struct { +#endif + uint32_t read_pointer; + uint32_t write_pointer; +#ifdef __XEN__ + }; + uint64_t full; + } ptrs; +#endif + buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM]; +}; /* NB. Size of this structure must be no greater than one page. */ +typedef struct buffered_iopage buffered_iopage_t; + +/* + * ACPI Control/Event register locations. Location is controlled by a + * version number in HVM_PARAM_ACPI_IOPORTS_LOCATION. + */ + +/* + * Version 0 (default): Traditional (obsolete) Xen locations. + * + * These are now only used for compatibility with VMs migrated + * from older Xen versions. + */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V0 0x1f40 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V0 (ACPI_PM_TMR_BLK_ADDRESS_V0 + 0x20) +#define ACPI_GPE0_BLK_LEN_V0 0x08 + +/* Version 1: Locations preferred by modern Qemu (including Qemu-trad). */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V1 0xb000 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V1 0xafe0 +#define ACPI_GPE0_BLK_LEN_V1 0x04 + +/* Compatibility definitions for the default location (version 0). */ +#define ACPI_PM1A_EVT_BLK_ADDRESS ACPI_PM1A_EVT_BLK_ADDRESS_V0 +#define ACPI_PM1A_CNT_BLK_ADDRESS ACPI_PM1A_CNT_BLK_ADDRESS_V0 +#define ACPI_PM_TMR_BLK_ADDRESS ACPI_PM_TMR_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_ADDRESS ACPI_GPE0_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_LEN ACPI_GPE0_BLK_LEN_V0 + +#endif /* _IOREQ_H_ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ From f4256f35dc548c885fb78368f801407a44e98118 Mon Sep 17 00:00:00 2001 From: TOKITA Hiroshi Date: Sat, 14 Jun 2025 01:04:44 +0900 Subject: [PATCH 10/14] [DNM] manifest: add `zephyr-xenlib` module Import `zephyr-xenlib` module. That contains xen public headers. Signed-off-by: TOKITA Hiroshi --- west.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/west.yml b/west.yml index a5154435a3cdf..873ad9dd33ab8 100644 --- a/west.yml +++ b/west.yml @@ -23,6 +23,8 @@ manifest: url-base: https://github.com/zephyrproject-rtos - name: babblesim url-base: https://github.com/BabbleSim + - name: soburi + url-base: https://github.com/soburi group-filter: [-babblesim, -optional] @@ -379,6 +381,10 @@ manifest: - name: zcbor revision: 9b07780aca6fb21f82a241ba386ad9b379809337 path: modules/lib/zcbor + - name: zephyr-xenlib + revision: xs-client + path: modules/lib/zephyr-xenlib + remote: soburi # zephyr-keep-sorted-stop self: From 32e0fd037fff472dea8621a7619f6e345f80d14b Mon Sep 17 00:00:00 2001 From: TOKITA Hiroshi Date: Tue, 22 Jul 2025 08:24:40 +0900 Subject: [PATCH 11/14] drivers: Introduce vhost driver subsystem This introduces the vhost driver framework, providing standard APIs for VIRTIO backend implementations in Zephyr. Includes vringh utility for host-side VIRTIO ring processing based on Linux kernel implementation. Signed-off-by: TOKITA Hiroshi --- drivers/CMakeLists.txt | 1 + drivers/Kconfig | 1 + drivers/vhost/CMakeLists.txt | 6 + drivers/vhost/Kconfig | 15 ++ drivers/vhost/vringh.c | 327 ++++++++++++++++++++++++++ include/zephyr/drivers/vhost.h | 260 ++++++++++++++++++++ include/zephyr/drivers/vhost/vringh.h | 250 ++++++++++++++++++++ 7 files changed, 860 insertions(+) create mode 100644 drivers/vhost/CMakeLists.txt create mode 100644 drivers/vhost/Kconfig create mode 100644 drivers/vhost/vringh.c create mode 100644 include/zephyr/drivers/vhost.h create mode 100644 include/zephyr/drivers/vhost/vringh.h diff --git a/drivers/CMakeLists.txt b/drivers/CMakeLists.txt index b3275816f472f..3005ae64da82b 100644 --- a/drivers/CMakeLists.txt +++ b/drivers/CMakeLists.txt @@ -93,6 +93,7 @@ add_subdirectory_ifdef(CONFIG_STEPPER stepper) add_subdirectory_ifdef(CONFIG_SYSCON syscon) add_subdirectory_ifdef(CONFIG_SYS_CLOCK_EXISTS timer) add_subdirectory_ifdef(CONFIG_TEE tee) +add_subdirectory_ifdef(CONFIG_VHOST vhost) add_subdirectory_ifdef(CONFIG_VIDEO video) add_subdirectory_ifdef(CONFIG_VIRTIO virtio) add_subdirectory_ifdef(CONFIG_VIRTUALIZATION virtualization) diff --git a/drivers/Kconfig b/drivers/Kconfig index 4dc5a629677c7..8289483bdcb11 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -92,6 +92,7 @@ source "drivers/syscon/Kconfig" source "drivers/timer/Kconfig" source "drivers/usb/Kconfig" source "drivers/usb_c/Kconfig" +source "drivers/vhost/Kconfig" source "drivers/video/Kconfig" source "drivers/virtio/Kconfig" source "drivers/virtualization/Kconfig" diff --git a/drivers/vhost/CMakeLists.txt b/drivers/vhost/CMakeLists.txt new file mode 100644 index 0000000000000..43e28601f9b43 --- /dev/null +++ b/drivers/vhost/CMakeLists.txt @@ -0,0 +1,6 @@ +# Copyright (c) 2025 TOKITA Hiroshi +# SPDX-License-Identifier: Apache-2.0 + +zephyr_library() + +zephyr_library_sources_ifdef(CONFIG_VHOST vringh.c) diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig new file mode 100644 index 0000000000000..11b5c4116e930 --- /dev/null +++ b/drivers/vhost/Kconfig @@ -0,0 +1,15 @@ +# Copyright (c) 2025 TOKITA Hiroshi +# SPDX-License-Identifier: Apache-2.0 + +config VHOST + bool "support for VIRTIO" + help + Enable options for VIRTIO + +if VHOST + +endif # VIRTIO + +module = VHOST +module-str = VHOST +source "subsys/logging/Kconfig.template.log_config" diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c new file mode 100644 index 0000000000000..54c58c5c74251 --- /dev/null +++ b/drivers/vhost/vringh.c @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2025 TOKITA Hiroshi + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include + +LOG_MODULE_REGISTER(vhost_vringh); + +int vringh_init(struct vringh *vrh, uint64_t features, uint16_t num, bool weak_barriers, + struct virtq_desc *desc, struct virtq_avail *avail, struct virtq_used *used) +{ + if (!vrh || !desc || !avail || !used) { + return -EINVAL; + } + + memset(vrh, 0, sizeof(*vrh)); + + vrh->event_indices = false; /* not supported */ + vrh->weak_barriers = weak_barriers; + vrh->last_avail_idx = 0; + vrh->last_used_idx = 0; + vrh->completed = 0; + + vrh->vring.num = num; + vrh->vring.desc = desc; + vrh->vring.avail = avail; + vrh->vring.used = used; + + return 0; +} + +static void vringh_kick_callback(const struct device *dev, uint16_t queue_id, void *ptr) +{ + struct vringh *vrh = ptr; + + if (vrh->kick) { + vrh->kick(vrh); + } +} + +int vringh_init_device(struct vringh *vrh, const struct device *dev, uint16_t queue_id, + void (*kick_callback)(struct vringh *vrh)) +{ + uint64_t drv_feats; + void *parts[3]; + size_t q_num; + int ret; + + if (!vrh || !dev) { + return -EINVAL; + } + + ret = vhost_get_virtq(dev, queue_id, parts, &q_num); + if (ret < 0) { + LOG_ERR("vhost_get_virtq failed: %d", ret); + return ret; + } + + ret = vhost_get_driver_features(dev, &drv_feats); + if (ret < 0) { + LOG_ERR("vhost_get_driver_features failed: %d", ret); + return ret; + } + + ret = vringh_init(vrh, drv_feats, q_num, false, parts[0], parts[1], parts[2]); + if (ret < 0) { + LOG_ERR("vringh_init failed: %d", ret); + return ret; + } + + vrh->dev = dev; + vrh->queue_id = queue_id; + vrh->kick = kick_callback; + + ret = vhost_register_virtq_notify_cb(dev, queue_id, vringh_kick_callback, (void *)vrh); + if (ret < 0) { + LOG_ERR("vhost_set_notify_callback failed: %d", ret); + return ret; + } + + return 0; +} + +int vringh_getdesc(struct vringh *vrh, struct vringh_iov *riov, struct vringh_iov *wiov, + uint16_t *head_out) +{ + if (!vrh || !riov || !wiov || !head_out) { + return -EINVAL; + } + + barrier_dmem_fence_full(); + + k_spinlock_key_t key = k_spin_lock(&vrh->lock); + struct vring *vr = &vrh->vring; + const uint16_t avail_idx = sys_le16_to_cpu(vrh->vring.avail->idx); + const uint16_t slot = vrh->last_avail_idx % vr->num; + const uint16_t head = sys_le16_to_cpu(vr->avail->ring[slot]); + struct vhost_buf desc_ranges[vr->num]; + size_t filled_read = 0; + size_t filled_write = 0; + uint16_t idx = head; + size_t count = 0; + uint16_t flags; + int ret; + + if (vrh->last_avail_idx == avail_idx) { + k_spin_unlock(&vrh->lock, key); + return 0; + } + + if (head >= vrh->vring.num) { + k_spin_unlock(&vrh->lock, key); + LOG_ERR("Invalid descriptor head: %u >= %u", head, vrh->vring.num); + return -EINVAL; + } + + if (!vrh->event_indices) { + flags = sys_le16_to_cpu(vr->used->flags); + flags |= VIRTQ_USED_F_NO_NOTIFY; + vr->used->flags = sys_cpu_to_le16(flags); + } + + barrier_dmem_fence_full(); + + k_spin_unlock(&vrh->lock, key); + + vringh_iov_reset(riov); + vringh_iov_reset(wiov); + + do { + const struct virtq_desc *d = &vr->desc[idx]; + const uint64_t gpa = sys_le64_to_cpu(d->addr); + const uint32_t len = sys_le32_to_cpu(d->len); + const uint16_t next = sys_le16_to_cpu(d->next); + + flags = sys_le16_to_cpu(d->flags); + + if (count >= vr->num) { + LOG_ERR("Descriptor chain too long: %zu", count); + ret = -ENOMEM; + goto failed; + } + + /* Validate next descriptor index */ + if ((flags & VIRTQ_DESC_F_NEXT) && next >= vr->num) { + LOG_ERR("Invalid next descriptor: %u >= %u", next, vr->num); + ret = -EINVAL; + goto failed; + } + + if (len == 0) { + LOG_WRN("Zero-length descriptor at index %u", idx); + idx = next; + continue; + } + + /* Store descriptor information for Phase 2 */ + desc_ranges[count].gpa = gpa; + desc_ranges[count].len = len; + desc_ranges[count].is_write = !!(flags & VIRTQ_DESC_F_WRITE); + + count++; + idx = next; + } while (flags & VIRTQ_DESC_F_NEXT); + + ret = vhost_prepare_iovec(vrh->dev, vrh->queue_id, head, desc_ranges, count, riov->iov, + riov->max_num, wiov->iov, wiov->max_num, &filled_read, + &filled_write); + if (ret < 0) { + LOG_ERR("vhost_prepare_iovec failed: %d", ret); + goto failed; + } + + riov->used = filled_read; + wiov->used = filled_write; + + /* Success - update state and return */ + *head_out = head; + + key = k_spin_lock(&vrh->lock); + vrh->last_avail_idx++; + k_spin_unlock(&vrh->lock, key); + + return 1; + +failed: + if (count > 0) { + int rc = vhost_release_iovec(vrh->dev, vrh->queue_id, head); + + if (rc < 0) { + LOG_ERR("vhost_release_iovec failed: %d", rc); + vhost_set_device_status(vrh->dev, DEVICE_STATUS_FAILED); + } + } + + vringh_iov_reset(riov); + vringh_iov_reset(wiov); + + return ret; +} + +int vringh_complete(struct vringh *vrh, uint16_t head, uint32_t total_len) +{ + struct vring *vr = &vrh->vring; + int rc = 0; + + if (!vrh) { + return -EINVAL; + } + + rc = vhost_release_iovec(vrh->dev, vrh->queue_id, head); + if (rc < 0) { + LOG_ERR("vhost_release_iovec failed: %d", rc); + vhost_set_device_status(vrh->dev, DEVICE_STATUS_FAILED); + return rc; + } + + k_spinlock_key_t key = k_spin_lock(&vrh->lock); + + const uint16_t used_idx = sys_le16_to_cpu(vr->used->idx); + struct virtq_used_elem *ue = &vr->used->ring[used_idx % vr->num]; + + LOG_DBG("used_idx %u ue={%u, %u}", used_idx, head, total_len); + + ue->id = sys_cpu_to_le32((uint32_t)head); + ue->len = sys_cpu_to_le32(total_len); + + barrier_dmem_fence_full(); + vr->used->idx = sys_cpu_to_le16(used_idx + 1); + vrh->last_used_idx++; + + uint16_t flags = sys_le16_to_cpu(vr->used->flags); + + flags &= ~VIRTQ_USED_F_NO_NOTIFY; + vr->used->flags = sys_cpu_to_le16(flags); + barrier_dmem_fence_full(); + + k_spin_unlock(&vrh->lock, key); + + return rc; +} + +int vringh_abandon(struct vringh *vrh, uint32_t num) +{ + struct vring *vr = &vrh->vring; + int rc = 0; + + if (num == 0) { + return 0; + } + + if (!vrh) { + return -EINVAL; + } + + k_spinlock_key_t key = k_spin_lock(&vrh->lock); + + if (num <= vrh->last_avail_idx) { + vrh->last_avail_idx -= num; + LOG_DBG("Abandoned %u descs, new last_avail_idx: %u", num, vrh->last_avail_idx); + } else { + LOG_ERR("Cannot abandon %u descs, avail=%u", num, vrh->last_avail_idx); + rc = -ERANGE; + } + + uint16_t flags = sys_le16_to_cpu(vr->used->flags); + + flags &= ~VIRTQ_USED_F_NO_NOTIFY; + vr->used->flags = sys_cpu_to_le16(flags); + + barrier_dmem_fence_full(); + + k_spin_unlock(&vrh->lock, key); + + return rc; +} + +void vringh_iov_reset(struct vringh_iov *iov) +{ + if (!iov || !iov->iov) { + return; + } + + if (iov->consumed > 0 && iov->i < iov->used) { + iov->iov[iov->i].iov_len += iov->consumed; + iov->iov[iov->i].iov_base = (char *)iov->iov[iov->i].iov_base - iov->consumed; + } + + iov->consumed = 0; + iov->i = 0; + iov->used = 0; +} + +int vringh_need_notify(struct vringh *vrh) +{ + if (!vrh) { + return -EINVAL; + } + + k_spinlock_key_t key = k_spin_lock(&vrh->lock); + const uint16_t flags = sys_le16_to_cpu(vrh->vring.avail->flags); + + k_spin_unlock(&vrh->lock, key); + + return !(flags & VIRTQ_AVAIL_F_NO_INTERRUPT); +} + +void vringh_notify(struct vringh *vrh) +{ + k_spinlock_key_t key = k_spin_lock(&vrh->lock); + const uint16_t flags = sys_le16_to_cpu(vrh->vring.avail->flags); + + k_spin_unlock(&vrh->lock, key); + + if (flags & VIRTQ_AVAIL_F_NO_INTERRUPT) { + return; + } + + vhost_notify_virtq(vrh->dev, vrh->queue_id); +} diff --git a/include/zephyr/drivers/vhost.h b/include/zephyr/drivers/vhost.h new file mode 100644 index 0000000000000..56c9a7aad781f --- /dev/null +++ b/include/zephyr/drivers/vhost.h @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2025 TOKITA Hiroshi + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_DRIVERS_VHOST_H_ +#define ZEPHYR_DRIVERS_VHOST_H_ + +/** + * @brief VHost API + * + * The VHost provides functions for VIRTIO device backends in + * a hypervisor environment. + * VHost backends handle guest VIRTIO requests and respond to them. + * + * @defgroup vhost_apis VHost Controller APIs + * @ingroup io_interfaces + * @{ + */ + +#include +#include + +/** + * Represents a memory buffer segment for VHost operations. + */ +struct vhost_iovec { + void *iov_base; + size_t iov_len; +}; + +/** + * Represents a guest physical address and length pair for VHost operations. + */ +struct vhost_buf { + uint64_t gpa; + size_t len; + bool is_write; +}; + +/** + * VHost controller API structure + */ +__subsystem struct vhost_controller_api { + int (*prepare_iovec)(const struct device *dev, uint16_t queue_id, uint16_t head, + const struct vhost_buf *bufs, size_t bufs_count, + struct vhost_iovec *read_iovec, size_t max_read_iovecs, + struct vhost_iovec *write_iovec, size_t max_write_iovecs, + size_t *read_count, size_t *write_count); + int (*release_iovec)(const struct device *dev, uint16_t queue_id, uint16_t head); + int (*get_virtq)(const struct device *dev, uint16_t queue_id, void **parts, + size_t *queue_size); + int (*get_driver_features)(const struct device *dev, uint64_t *drv_feats); + bool (*virtq_is_ready)(const struct device *dev, uint16_t queue_id); + int (*register_virtq_ready_cb)(const struct device *dev, + void (*callback)(const struct device *dev, uint16_t queue_id, + void *data), + void *data); + int (*register_virtq_notify_cb)(const struct device *dev, uint16_t queue_id, + void (*callback)(const struct device *dev, + uint16_t queue_id, void *data), + void *data); + int (*notify_virtq)(const struct device *dev, uint16_t queue_id); + int (*set_device_status)(const struct device *dev, uint32_t status); +}; + +/** + * @brief Prepare iovecs for virtq process + * + * Maps guest physical addresses to host virtual addresses for the given + * GPAs and fills the provided read and write iovec arrays. + * + * @param dev VHost device + * @param queue_id Queue identifier + * @param slot_id Slot identifier + * @param bufs Array of GPA/length pairs + * @param bufs_count Number of bufs in the array + * @param read_iovec Array to fill with read iovecs + * @param max_read_iovecs Maximum number of read iovecs that can be stored + * @param write_iovec Array to fill with write iovecs + * @param max_write_iovecs Maximum number of write iovecs that can be stored + * @param read_count Number of read iovecs prepared + * @param write_count Number of write iovecs prepared + * + * @retval 0 Success + * @retval -EINVAL Invalid parameters + * @retval -ENOMEM Insufficient memory + * @retval -E2BIG Buffer too large (in other word, iovecs are too small) + */ +static inline int vhost_prepare_iovec(const struct device *dev, uint16_t queue_id, uint16_t slot_id, + const struct vhost_buf *bufs, size_t bufs_count, + struct vhost_iovec *read_iovec, size_t max_read_iovecs, + struct vhost_iovec *write_iovec, size_t max_write_iovecs, + size_t *read_count, size_t *write_count) +{ + const struct vhost_controller_api *api = dev->api; + + return api->prepare_iovec(dev, queue_id, slot_id, bufs, bufs_count, read_iovec, + max_read_iovecs, write_iovec, max_write_iovecs, read_count, + write_count); +} + +/** + * @brief Release all iovecs + * + * Release iovecs that prepared by host_prepare_iovec. + * + * @param dev VHost controller device + * @param queue_id Queue ID + * @param slot_id Slot ID. + * + * @retval 0 Success + * @retval -EINVAL Invalid parameters + */ +static inline int vhost_release_iovec(const struct device *dev, uint16_t queue_id, uint16_t slot_id) +{ + const struct vhost_controller_api *api = dev->api; + + return api->release_iovec(dev, queue_id, slot_id); +} + +/** + * @brief Get VirtQueue components + * + * @param dev VHost controller device + * @param queue_id Queue ID + * @param parts Array for descriptor, available, used ring pointers + * @param queue_size Queue size output + * + * @retval 0 Success + * @retval -EINVAL Invalid parameters + * @retval -ENODEV Queue not ready + */ +static inline int vhost_get_virtq(const struct device *dev, uint16_t queue_id, void **parts, + size_t *queue_size) +{ + const struct vhost_controller_api *api = dev->api; + + return api->get_virtq(dev, queue_id, parts, queue_size); +} + +/** + * @brief Get negotiated VirtIO feature bits + * + * @param dev VHost controller device + * @param drv_feats Output for feature mask + * + * @retval 0 Success + * @retval -EINVAL Invalid parameters + */ +static inline int vhost_get_driver_features(const struct device *dev, uint64_t *drv_feats) +{ + const struct vhost_controller_api *api = dev->api; + + return api->get_driver_features(dev, drv_feats); +} + +/** + * @brief Check if queue is ready for processing + * + * @param dev VHost controller device + * @param queue_id Queue ID (0-based) + * + * @retval true Queue is ready + * @retval false Queue not ready or invalid + */ +static inline bool vhost_queue_ready(const struct device *dev, uint16_t queue_id) +{ + const struct vhost_controller_api *api = dev->api; + + return api->virtq_is_ready(dev, queue_id); +} + +/** + * @brief Register device-wide queue ready callback + * + * This callback will unregister on device reset. + * + * @param dev VHost controller device + * @param callback Function to call when any queue becomes ready + * @param user_data User data for callback + * + * @retval 0 Success + * @retval -EINVAL Invalid parameters + */ +static inline int vhost_register_virtq_ready_cb(const struct device *dev, + void (*callback)(const struct device *dev, + uint16_t queue_id, void *data), + void *user_data) +{ + const struct vhost_controller_api *api = dev->api; + + return api->register_virtq_ready_cb(dev, callback, user_data); +} + +/** + * @brief Register per-queue guest notification callback + * + * This callback will unregister on queue reset. + * + * @param dev VHost controller device + * @param queue_id Queue ID (0-based) + * @param callback Function to call on guest notifications + * @param user_data User data for callback + * + * @retval 0 Success + * @retval -EINVAL Invalid parameters + * @retval -ENODEV Queue not found + */ +static inline int vhost_register_virtq_notify_cb(const struct device *dev, uint16_t queue_id, + void (*callback)(const struct device *dev, + uint16_t queue_id, void *data), + void *user_data) +{ + const struct vhost_controller_api *api = dev->api; + + return api->register_virtq_notify_cb(dev, queue_id, callback, user_data); +} + +/** + * @brief Send interrupt notification to guest + * + * @param dev VHost controller device + * @param queue_id Queue ID (0-based) + * + * @retval 0 Success + * @retval -EINVAL Invalid parameters + * @retval -ENODEV Queue not ready + * @retval -EIO Interrupt delivery failed + */ +static inline int vhost_notify_virtq(const struct device *dev, uint16_t queue_id) +{ + const struct vhost_controller_api *api = dev->api; + + return api->notify_virtq(dev, queue_id); +} + +/** + * @brief Set device status and notify guest + * + * @param dev VHost controller device + * @param status VirtIO device status bits to set + * + * @retval 0 Success + * @retval -EINVAL Invalid parameters + * @retval -EIO Notification failed + */ +static inline int vhost_set_device_status(const struct device *dev, uint32_t status) +{ + const struct vhost_controller_api *api = dev->api; + + return api->set_device_status(dev, status); +} + +/** + * @} + */ + +#endif /* ZEPHYR_DRIVERS_VHOST_H_ */ diff --git a/include/zephyr/drivers/vhost/vringh.h b/include/zephyr/drivers/vhost/vringh.h new file mode 100644 index 0000000000000..b20ce4e42be6e --- /dev/null +++ b/include/zephyr/drivers/vhost/vringh.h @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2025 TOKITA Hiroshi + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_DRIVERS_VHOST_VRINGH_H_ +#define ZEPHYR_DRIVERS_VHOST_VRINGH_H_ + +/** + * @file + * @brief VIRTIO Ring Handler API + * + * VIRTIO ring handler (vringh) provides host-side access to guest VIRTIO rings. + * Based on Linux kernel's vringh implementation. + * + * @defgroup vringh_apis VIRTIO Ring Handler APIs + * @ingroup vhost_apis + * @{ + */ + +#include +#include +#include + +struct virtq_desc; +struct virtq_avail; +struct virtq_used; + +/** + * @brief VirtQueue ring structure + * + * Contains pointers to VIRTIO ring components: descriptor table, + * available ring, and used ring. + */ +struct vring { + uint16_t num; /**< Number of descriptors in ring (power of 2) */ + struct virtq_desc *desc; /**< Descriptor table pointer (guest memory) */ + struct virtq_avail *avail; /**< Available ring pointer (guest memory) */ + struct virtq_used *used; /**< Used ring pointer (guest memory) */ +}; + +/** + * @brief VIRTIO ring host-side handler + * + * Host-side interface for processing guest VIRTIO rings. + * Based on Linux kernel vringh implementation with split virtqueue support. + */ +struct vringh { + bool event_indices; /**< Guest supports VIRTIO_F_EVENT_IDX */ + bool weak_barriers; /**< Use weak memory barriers */ + uint16_t last_avail_idx; /**< Last available index processed */ + uint16_t last_used_idx; /**< Last used index written */ + uint32_t completed; /**< Descriptors completed since last notification */ + struct vring vring; /**< VirtQueue ring components */ + const struct device *dev; /**< Associated VHost backend device */ + size_t queue_id; /**< Queue ID within VHost device */ + struct k_spinlock lock; /**< Spinlock for exclusive execution */ + + /** + * @brief Virtqueue notification callback + * Called to signal the VirtIO driver about completed buffers. + */ + void (*notify)(struct vringh *vr); + + /** + * @brief Queue kick callback + * Called when the VirtIO driver notifies (kicks) the queue. + */ + void (*kick)(struct vringh *vr); +}; + +/** + * @brief VirtQueue I/O vector structure + * + * Manages iovec array for processing VirtQueue descriptor chains. + * Tracks current position and handles partial buffer consumption. + */ +struct vringh_iov { + struct vhost_iovec *iov; /**< Array of I/O vectors */ + size_t i; /**< Current iovec index */ + size_t consumed; /**< Bytes consumed from current iovec */ + unsigned int max_num; /**< Maximum number of iovecs */ + unsigned int used; /**< Number of iovecs currently used */ +}; + +/** + * @brief Initialize VirtQueue ring handler with raw pointers + * + * @param vh VirtQueue ring handler to initialize + * @param features VIRTIO feature bits + * @param num Number of descriptors (must be power of 2) + * @param weak_barriers Use weak memory barriers + * @param desc Descriptor table pointer + * @param avail Available ring pointer + * @param used Used ring pointer + * + * @retval 0 Success + * @retval -EINVAL Invalid parameters + * @retval -ENOMEM Insufficient memory + */ +int vringh_init(struct vringh *vh, uint64_t features, uint16_t num, bool weak_barriers, + struct virtq_desc *desc, struct virtq_avail *avail, struct virtq_used *used); + +/** + * @brief Initialize VirtQueue ring handler with VHost device + * + * @param vrh VirtQueue ring handler to initialize + * @param dev VHost backend device + * @param queue_id Queue ID to handle + * @param kick_callback Queue kick callback invoked when the VirtIO driver notifies the queue + * + * @retval 0 Success + * @retval -EINVAL Invalid parameters + * @retval -ENODEV Device or queue not found + * @retval -ENOMEM Insufficient memory + * @retval -EBUSY Queue already in use + * @retval -ENOTCONN Device not connected + * + * @code{.c} + * static void kick_handler(struct vringh *vrh) + * { + * uint16_t head; + * + * while (vringh_getdesc(vrh, &riov, &wiov, &head) > 0) { + * process_buffers(&riov, &wiov); + * vringh_complete(vrh, head, total_bytes_written(&wiov)); + * if (vringh_need_notify(vrh) > 0) { + * vringh_notify(vrh); + * } + * } + * } + * + * int ret = vringh_init_device(&vrh, vhost_dev, 0, kick_handler); + * @endcode + */ +int vringh_init_device(struct vringh *vrh, const struct device *dev, uint16_t queue_id, + void (*kick_callback)(struct vringh *vrh)); + +/** + * @brief Retrieve next available descriptor from VirtQueue + * + * Maps descriptor chain into host-accessible iovecs. + * Separates readable and writable buffers per VIRTIO specification. + * + * @param vrh VirtQueue ring handler + * @param riov IOV for readable buffers + * @param wiov IOV for writable buffers + * @param head_out Descriptor head index for completion + * + * @retval 1 Success - descriptor retrieved + * @retval 0 No descriptors available + * @retval -errno Invalid parameters + */ +int vringh_getdesc(struct vringh *vrh, struct vringh_iov *riov, struct vringh_iov *wiov, + uint16_t *head_out); + +/** + * @brief Complete processing of VirtQueue descriptor + * + * Marks descriptor as completed and adds entry to used ring. + * Based on Linux vringh complete operation. + * + * @param vrh VirtQueue ring handler + * @param head Descriptor head index from vringh_getdesc() + * @param len Total bytes written to writable buffers + * + * @retval 0 Success + * @retval -EINVAL Invalid parameters + * @retval -EFAULT Cannot access used ring + * @retval -ENOSPC Used ring full + * @warning Do not call multiple times for the same descriptor + * + * @code{.c} + * // After processing a descriptor chain + * uint32_t bytes_written = generate_response(&wiov); + * int ret = vringh_complete(&vrh, head, bytes_written); + * if (ret == 0) { + * // Check if guest notification needed + * if (vringh_need_notify(&vrh) > 0) { + * vringh_notify(&vrh); + * } + * } + * @endcode + */ +int vringh_complete(struct vringh *vrh, uint16_t head, uint32_t len); + +/** + * @brief Abandon processing of descriptors without completion + * + * Returns descriptors to available state for re-processing. + * Based on Linux vringh abandon operation. + * + * @param vrh VirtQueue ring handler + * @param num Number of descriptors to abandon + * + * @retval 0 Success + * @retval -EINVAL Invalid parameters + * @retval -ERANGE Cannot abandon more than retrieved + * @retval -EFAULT Error accessing ring + */ +int vringh_abandon(struct vringh *vrh, uint32_t num); + +__maybe_unused static inline void vringh_iov_init(struct vringh_iov *iov, struct vhost_iovec *kvec, + unsigned int num) +{ + iov->used = iov->i = 0; + iov->consumed = 0; + iov->max_num = num; + iov->iov = kvec; +} + +/** + * @brief Reset IOV structure for reuse + * + * @param iov IOV structure to reset + */ +void vringh_iov_reset(struct vringh_iov *iov); + +/** + * @brief Check if guest notification is required + * + * Determines whether guest should be notified based on VIRTIO + * notification suppression mechanism. + * Based on Linux vringh need_notify implementation. + * + * @param vrh VirtQueue ring handler + * + * @retval 1 Notification required + * @retval 0 Notification suppressed + * @retval -EINVAL Invalid parameters + * @retval -EFAULT Cannot access guest memory + */ +int vringh_need_notify(struct vringh *vrh); + +/** + * @brief Send notification to guest about completed buffers + * + * Invokes registered notification callback to inform guest. + * Based on Linux vringh notify implementation. + * + * @param vrh VirtQueue ring handler + */ +void vringh_notify(struct vringh *vrh); + +/** + * @} + */ + +#endif /* ZEPHYR_DRIVERS_VHOST_VRINGH_H_ */ From edec3d4b1de105c463cb50d090600c6061a34f8e Mon Sep 17 00:00:00 2001 From: TOKITA Hiroshi Date: Fri, 13 Jun 2025 23:17:20 +0900 Subject: [PATCH 12/14] drivers: vhost: add Xen MMIO VirtIO backend Implements VirtIO backend over Xen MMIO interface Signed-off-by: TOKITA Hiroshi --- drivers/vhost/CMakeLists.txt | 1 + drivers/vhost/Kconfig | 9 + drivers/vhost/vhost_xen_mmio.c | 1451 +++++++++++++++++++++++ dts/bindings/vhost/xen,virtio-mmio.yaml | 39 + 4 files changed, 1500 insertions(+) create mode 100644 drivers/vhost/vhost_xen_mmio.c create mode 100644 dts/bindings/vhost/xen,virtio-mmio.yaml diff --git a/drivers/vhost/CMakeLists.txt b/drivers/vhost/CMakeLists.txt index 43e28601f9b43..72e1a581e2025 100644 --- a/drivers/vhost/CMakeLists.txt +++ b/drivers/vhost/CMakeLists.txt @@ -4,3 +4,4 @@ zephyr_library() zephyr_library_sources_ifdef(CONFIG_VHOST vringh.c) +zephyr_library_sources_ifdef(CONFIG_VHOST_XEN_MMIO vhost_xen_mmio.c) diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 11b5c4116e930..3c0169592bdb6 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -8,6 +8,15 @@ config VHOST if VHOST +config VHOST_XEN_MMIO + bool "support for MMIO-based VIRTIO backend on Xen hypervisor" + default y + depends on DT_HAS_XEN_VHOST_MMIO_ENABLED + select XEN_STORE_CLI + select XEN_DMOP + help + Enable VIRTIO-MMIO backend on Xen hypervisor + endif # VIRTIO module = VHOST diff --git a/drivers/vhost/vhost_xen_mmio.c b/drivers/vhost/vhost_xen_mmio.c new file mode 100644 index 0000000000000..d0fbe5075b2eb --- /dev/null +++ b/drivers/vhost/vhost_xen_mmio.c @@ -0,0 +1,1451 @@ +/* + * Copyright (c) 2025 TOKITA Hiroshi + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +LOG_MODULE_REGISTER(xen_vhost_mmio); + +#define DT_DRV_COMPAT xen_vhost_mmio + +#define PAGE_SHIFT (__builtin_ctz(CONFIG_MMU_PAGE_SIZE)) +#define XEN_GRANT_ADDR_OFF (1ULL << 63) + +#define VIRTIO_MMIO_MAGIC 0x74726976 +#define VIRTIO_MMIO_SUPPORTED_VERSION 2 + +/* The maximum retry period is 12.8 seconds */ +#define RETRY_DELAY_BASE_MS 50 +#define RETRY_BACKOFF_EXP_MAX 8 + +#define HEX_64BIT_DIGITS 16 + +#define LOG_LVL_Q(lvl, str, ...) \ + UTIL_CAT(LOG_, lvl)("%s[%u]: " str, __func__, queue_id, ##__VA_ARGS__) +#define LOG_ERR_Q(str, ...) LOG_LVL_Q(ERR, str, ##__VA_ARGS__) +#define LOG_WRN_Q(str, ...) LOG_LVL_Q(WRN, str, ##__VA_ARGS__) +#define LOG_INF_Q(str, ...) LOG_LVL_Q(INF, str, ##__VA_ARGS__) +#define LOG_DBG_Q(str, ...) LOG_LVL_Q(DBG, str, ##__VA_ARGS__) + +#define META_PAGES_INDEX(cfg) (cfg->queue_size_max) + +enum virtq_parts { + VIRTQ_DESC = 0, + VIRTQ_AVAIL, + VIRTQ_USED, + NUM_OF_VIRTQ_PARTS, +}; + +struct vhost_xen_mmio_config { + k_thread_stack_t *workq_stack; + size_t workq_stack_size; + int workq_priority; + + uint16_t num_queues; + uint16_t queue_size_max; + uint8_t device_id; + uint32_t vendor_id; + uintptr_t base; + size_t reg_size; + + uint64_t device_features; +}; + +struct mapped_pages { + uint64_t gpa; + uint8_t *buf; + size_t len; + struct gnttab_unmap_grant_ref *ops; + size_t map_count; + size_t pages; +}; + +struct mapped_pages_chunk { + size_t count; + struct mapped_pages *map; + bool releasing; +}; + +struct virtq_callback { + void (*cb)(const struct device *dev, uint16_t queue_id, void *user_data); + void *data; +}; + +struct virtq_context { + /** + * Store pages mapped to descriptors. + * Initializer allocate (queue_size_max + 1) statically, + * The last one is used to hold the desc, avail, and used + * of virtq itself. + */ + struct mapped_pages_chunk *pages_chunks; + struct virtq_callback queue_notify_cb; + atomic_t queue_size; + atomic_t queue_ready_notified; + uint64_t virtq_parts_gpa[NUM_OF_VIRTQ_PARTS]; + struct k_spinlock lock; +}; + +struct vhost_xen_mmio_data { + struct k_work_delayable init_work; + struct k_work_delayable isr_work; + struct k_work_delayable ready_work; + struct k_work_q workq; + const struct device *dev; + atomic_t initialized; + atomic_t retry; + + struct xs_watcher watcher; + evtchn_port_t xs_port; + evtchn_port_t ioserv_port; + struct shared_iopage *shared_iopage; + uint32_t vcpus; + + struct { + ioservid_t servid; + domid_t domid; + uint32_t deviceid; + uint32_t irq; + uintptr_t base; + } fe; + + struct { + uint64_t driver_features; + uint8_t device_features_sel; + uint8_t driver_features_sel; + atomic_t irq_status; + atomic_t status; + atomic_t queue_sel; + } be; + + atomic_t notify_queue_id; /**< Temporary variable to pass to workq */ + struct virtq_callback queue_ready_cb; + struct virtq_context *vq_ctx; +}; + +struct query_param { + const char *key; + const char *expected; +}; + +/** + * Get the nth string from a null-separated string buffer + */ +static const char *nth_str(const char *buf, size_t len, size_t n) +{ + int cnt = 0; + + if (n == 0) { + return buf; + } + + for (size_t i = 0; i < len; i++) { + if (buf[i] == '\0') { + cnt++; + } + + if (cnt == n && (i != (len - 1))) { + return &buf[i + 1]; + } + } + + return NULL; +} + +/** + * Query VIRTIO frontend's domid/deviceid from XenStore + */ +static int query_virtio_backend(const struct query_param *params, size_t param_num, domid_t *domid, + int *deviceid) +{ + char buf[65] = {0}; + const size_t len = ARRAY_SIZE(buf) - 1; + const char *ptr_i, *ptr_j; + int i, j; + + const ssize_t len_i = xs_directory("backend/virtio", buf, len, 0); + + if (len_i < 0) { + return -EIO; + } + if (len_i == 6 && strncmp(buf, "ENOENT", len) == 0) { + return -ENOENT; + } + + for (i = 0, ptr_i = buf; ptr_i; ptr_i = nth_str(buf, len_i, i++)) { + char *endptr; + + *domid = strtol(ptr_i, &endptr, 10); + if (*endptr != '\0') { + continue; + } + + snprintf(buf, len, "backend/virtio/%d", *domid); + + const ssize_t len_j = xs_directory(buf, buf, ARRAY_SIZE(buf), 0); + + if (len_j < 0 || strncmp(buf, "ENOENT", ARRAY_SIZE(buf)) == 0) { + continue; + } + + for (j = 0, ptr_j = buf; ptr_j; ptr_j = nth_str(buf, len_j, j++)) { + *deviceid = strtol(ptr_j, &endptr, 10); + if (*endptr != '\0') { + continue; /* Skip invalid device ID */ + } + + bool match = true; + + for (size_t k = 0; k < param_num; k++) { + snprintf(buf, len, "backend/virtio/%d/%d/%s", *domid, *deviceid, + params[k].key); + const ssize_t len_k = xs_read(buf, buf, ARRAY_SIZE(buf), 0); + + if ((len_k < 0) || (strncmp(buf, "ENOENT", ARRAY_SIZE(buf)) == 0) || + (strncmp(params[k].expected, buf, ARRAY_SIZE(buf)) != 0)) { + match = false; + break; + } + } + + if (match) { + return 0; + } + } + } + + return -ENOENT; +} + +static uintptr_t query_irq(domid_t domid, int deviceid) +{ + char buf[65] = {0}; + size_t len = ARRAY_SIZE(buf) - 1; + char *endptr; + + snprintf(buf, len, "backend/virtio/%d/%d/irq", domid, deviceid); + + len = xs_read(buf, buf, ARRAY_SIZE(buf) - 1, 0); + if ((len < 0) || (strncmp(buf, "ENOENT", ARRAY_SIZE(buf)) == 0)) { + return (uintptr_t)-1; + } + + uintptr_t irq_val = strtol(buf, &endptr, 10); + + if (*endptr != '\0') { + return (uintptr_t)-1; + } + + return irq_val; +} + +static int unmap_pages(struct mapped_pages *pages) +{ + int ret = 0; + + if (!pages || !pages->ops) { + return 0; + } + + LOG_DBG("%s: pages=%p unmap=%p count=%zu pages=%zu", __func__, pages, pages->ops, + pages->map_count, pages->pages); + + for (size_t i = 0; i < pages->map_count; i++) { + LOG_DBG("pages: i=%zu status=%d", i, pages->ops[i].status); + if (pages->ops[i].status == GNTST_okay) { + int rc = gnttab_unmap_refs(&pages->ops[i], 1); + + if (rc < 0) { + LOG_ERR("gnttab_unmap_refs failed: %d", rc); + ret = rc; + } + pages->ops[i].status = GNTST_general_error; + } + } + + pages->map_count = 0; + + return ret; +} + +static int free_pages_array(struct mapped_pages *pages, size_t len) +{ + int ret = 0; + + if (!pages) { + return 0; + } + + for (size_t i = 0; i < len; i++) { + int rc = unmap_pages(&pages[i]); + + if (rc < 0) { + LOG_ERR("%s: [%zu] unmap failed: %d", __func__, i, rc); + ret = rc; + } + + if (pages[i].ops) { + k_free(pages[i].ops); + } + + if (pages[i].buf) { + rc = gnttab_put_pages(pages[i].buf, pages[i].pages); + if (rc < 0) { + LOG_ERR("%s: [%zu] gnttab_put_pages failed: %d", __func__, i, rc); + ret = rc; + } + } + } + + return ret; +} + +static inline k_spinlock_key_t wait_for_chunk_ready(struct virtq_context *ctx, + struct mapped_pages_chunk *chunk, + k_spinlock_key_t key) +{ + while (chunk->releasing) { + k_spin_unlock(&ctx->lock, key); + key = k_spin_lock(&ctx->lock); + } + + return key; +} + +static void reset_queue(const struct device *dev, uint16_t queue_id) +{ + const struct vhost_xen_mmio_config *config = dev->config; + struct vhost_xen_mmio_data *data = dev->data; + struct virtq_context *vq_ctx = &data->vq_ctx[queue_id]; + + k_spinlock_key_t key = k_spin_lock(&vq_ctx->lock); + + for (size_t i = 0; i <= config->queue_size_max; i++) { + struct mapped_pages_chunk *chunk = &vq_ctx->pages_chunks[i]; + + key = wait_for_chunk_ready(vq_ctx, chunk, key); + + if (chunk->map && chunk->count > 0) { + const size_t count = chunk->count; + + chunk->releasing = true; + chunk->map = NULL; + chunk->count = 0; + k_spin_unlock(&vq_ctx->lock, key); + + free_pages_array(chunk->map, count); + k_free(chunk->map); + + key = k_spin_lock(&vq_ctx->lock); + chunk->releasing = false; + } else { + chunk->count = 0; + } + } + + vq_ctx->queue_notify_cb.cb = NULL; + vq_ctx->queue_notify_cb.data = NULL; + + k_spin_unlock(&vq_ctx->lock, key); + + atomic_set(&vq_ctx->queue_size, 0); + atomic_set(&vq_ctx->queue_ready_notified, 0); +} + +static void setup_unmap_info(struct mapped_pages *pages, const struct vhost_buf *bufs, + size_t bufs_len, const struct gnttab_map_grant_ref *map_ops) +{ + size_t map_idx = 0; + + for (size_t i = 0; i < bufs_len; i++) { + const size_t num_pages = (bufs[i].len + XEN_PAGE_SIZE - 1) / XEN_PAGE_SIZE; + struct mapped_pages *page_info = &pages[i]; + + for (size_t j = 0; j < num_pages; j++) { + const struct gnttab_map_grant_ref *map = &map_ops[map_idx]; + struct gnttab_unmap_grant_ref *unmap = &page_info->ops[j]; + + unmap->host_addr = map->host_addr; + unmap->dev_bus_addr = map->dev_bus_addr; + unmap->handle = map->handle; + unmap->status = map->status; + map_idx++; + } + + page_info->map_count = num_pages; + LOG_DBG("%s: range[%zu] map_count=%zu num_pages=%zu " + "pages=%zu", + __func__, i, page_info->map_count, num_pages, page_info->pages); + } +} + +static int setup_iovec_mappings(struct mapped_pages *pages, domid_t domid, + const struct vhost_buf *bufs, size_t bufs_len) +{ + size_t total_map_ops = 0; + size_t map_idx = 0; + int ret = 0; + + for (size_t i = 0; i < bufs_len; i++) { + total_map_ops += (bufs[i].len + XEN_PAGE_SIZE - 1) / XEN_PAGE_SIZE; + } + + struct gnttab_map_grant_ref *map_ops = + k_malloc(sizeof(struct gnttab_map_grant_ref) * total_map_ops); + + if (!map_ops) { + LOG_ERR("k_malloc failed for %zu map operations", total_map_ops); + return -ENOMEM; + } + + for (size_t i = 0; i < bufs_len; i++) { + const size_t num_pages = (bufs[i].len + XEN_PAGE_SIZE - 1) / XEN_PAGE_SIZE; + struct mapped_pages *page_info = &pages[i]; + + for (size_t j = 0; j < num_pages; j++) { + const uint64_t page_gpa = bufs[i].gpa + (j * XEN_PAGE_SIZE); + + if (!(page_gpa & XEN_GRANT_ADDR_OFF)) { + LOG_ERR("addr missing grant marker: 0x%" PRIx64, page_gpa); + ret = -EINVAL; + goto free_ops; + } + + map_ops[map_idx].host_addr = + (uintptr_t)page_info->buf + (j * XEN_PAGE_SIZE); + map_ops[map_idx].flags = GNTMAP_host_map; + map_ops[map_idx].ref = (page_gpa & ~XEN_GRANT_ADDR_OFF) >> XEN_PAGE_SHIFT; + map_ops[map_idx].dom = domid; + + map_idx++; + } + } + + ret = gnttab_map_refs(map_ops, total_map_ops); + if (ret < 0) { + LOG_ERR("gnttab_map_refs failed: %d", ret); + goto free_ops; + } + + /* Check mapping results */ + map_idx = 0; + for (size_t i = 0; i < bufs_len; i++) { + const size_t num_pages = (bufs[i].len + XEN_PAGE_SIZE - 1) / XEN_PAGE_SIZE; + + for (size_t j = 0; j < num_pages; j++) { + const struct gnttab_map_grant_ref *op = &map_ops[map_idx]; + + if (op->status != GNTST_okay) { + LOG_ERR("Mapping failed for range %zu page %zu: status=%d", i, j, + op->status); + ret = -EIO; + goto unmap; + } + map_idx++; + } + } + + ret = 0; + +unmap: + setup_unmap_info(pages, bufs, bufs_len, map_ops); + + if (ret < 0) { + unmap_pages(pages); + } + +free_ops: + if (map_ops) { + k_free(map_ops); + } + + return ret; +} + +static int init_pages_chunks(const struct device *dev, uint16_t queue_id, uint16_t head, + const struct vhost_buf *bufs, size_t bufs_len, + struct vhost_iovec *r_iovecs, size_t r_iovecs_len, + struct vhost_iovec *w_iovecs, size_t w_iovecs_len, size_t *r_count_out, + size_t *w_count_out, size_t total_pages) +{ + struct vhost_xen_mmio_data *data = dev->data; + struct virtq_context *vq_ctx = &data->vq_ctx[queue_id]; + struct mapped_pages_chunk *chunk = &vq_ctx->pages_chunks[head]; + size_t r_iovecs_count = 0, w_iovecs_count = 0; + int ret; + + LOG_DBG_Q("%zu bufs, %zu total pages", bufs_len, total_pages); + + /* Reallocate page chunk structure */ + if (!chunk->map || chunk->count < bufs_len) { + if (chunk->map) { + free_pages_array(chunk->map, chunk->count); + k_free(chunk->map); + } + chunk->map = k_malloc(sizeof(struct mapped_pages) * bufs_len); + if (!chunk->map) { + return -ENOMEM; + } + memset(chunk->map, 0, sizeof(struct mapped_pages) * bufs_len); + chunk->count = bufs_len; + LOG_DBG_Q("Allocated chunk at %p, count=%zu", chunk->map, chunk->count); + } + + for (size_t i = 0; i < bufs_len; i++) { + const size_t num_pages = (bufs[i].len + XEN_PAGE_SIZE - 1) / XEN_PAGE_SIZE; + struct mapped_pages *page_info = &chunk->map[i]; + + /* Allocate or reuse buffer for this range */ + if (!page_info->buf || page_info->pages < num_pages) { + free_pages_array(page_info, 1); + memset(page_info, 0, sizeof(struct mapped_pages)); + + barrier_dmem_fence_full(); + + page_info->len = num_pages * XEN_PAGE_SIZE; + page_info->gpa = bufs[i].gpa; + page_info->buf = gnttab_get_pages(num_pages); + page_info->ops = + k_malloc(sizeof(struct gnttab_unmap_grant_ref) * num_pages); + page_info->pages = num_pages; + page_info->map_count = 0; + + LOG_DBG_Q("range[%zu] allocated pages=%zu num_pages=%zu buf=%p unmap=%p", i, + page_info->pages, num_pages, page_info->buf, page_info->ops); + + if (!page_info->buf || !page_info->ops) { + LOG_ERR_Q("Failed to allocate for range[%zu]: buf=%p unmap=%p", i, + page_info->buf, page_info->ops); + return -ENOMEM; + } + + for (size_t j = 0; j < num_pages; j++) { + page_info->ops[j].status = GNTST_general_error; + } + } + } + + ret = setup_iovec_mappings(chunk->map, data->fe.domid, bufs, bufs_len); + if (ret < 0) { + return ret; + } + + k_spinlock_key_t key = k_spin_lock(&vq_ctx->lock); + + key = wait_for_chunk_ready(vq_ctx, chunk, key); + + for (size_t i = 0; i < bufs_len; i++) { + const bool is_write = bufs[i].is_write; + const size_t iovecs_len = is_write ? w_iovecs_len : r_iovecs_len; + const size_t current_count = is_write ? w_iovecs_count : r_iovecs_count; + struct vhost_iovec *iovec = + is_write ? &w_iovecs[w_iovecs_count] : &r_iovecs[r_iovecs_count]; + + if (current_count >= iovecs_len) { + LOG_ERR_Q("no more %s iovecs: %zu >= %zu", is_write ? "write" : "read", + current_count, iovecs_len); + k_spin_unlock(&vq_ctx->lock, key); + ret = -E2BIG; + break; + } + + const size_t page_offset = bufs[i].gpa & (XEN_PAGE_SIZE - 1); + const void *base_buf = chunk->map[i].buf; + const void *va = (void *)(((uintptr_t)base_buf) + page_offset); + + iovec->iov_base = (void *)va; + iovec->iov_len = bufs[i].len; + + if (is_write) { + w_iovecs_count++; + } else { + r_iovecs_count++; + } + } + + if (ret != -E2BIG) { + k_spin_unlock(&vq_ctx->lock, key); + } + + if (ret < 0) { + LOG_ERR_Q("chunk[%u] initialization failed: %d", head, ret); + } else { + *r_count_out = r_iovecs_count; + *w_count_out = w_iovecs_count; + LOG_DBG_Q("chunk[%u] init succeed bufs=%zu", head, bufs_len); + } + + return ret; +} + +/** + * @brief Set up a VirtIO queue for operation + * + * Maps the required grant pages for VirtIO ring structures (descriptor, + * available, and used rings) based on the current queue size. Uses + * metachain (pages[queue_size_max]) to store the meta pages. + * + * @param dev VHost device instance + * @param queue_id ID of the queue to set up + * @return 0 on success, negative error code on failure + */ +static int setup_queue(const struct device *dev, uint16_t queue_id) +{ + const struct vhost_xen_mmio_config *config = dev->config; + struct vhost_xen_mmio_data *data = dev->data; + struct virtq_context *vq_ctx = &data->vq_ctx[queue_id]; + const size_t queue_size = atomic_get(&data->vq_ctx[queue_id].queue_size); + const size_t num_pages[] = {DIV_ROUND_UP(16 * queue_size, XEN_PAGE_SIZE), + DIV_ROUND_UP(2 * queue_size + 6, XEN_PAGE_SIZE), + DIV_ROUND_UP(8 * queue_size + 6, XEN_PAGE_SIZE)}; + int ret = 0; + + struct vhost_buf meta_bufs[NUM_OF_VIRTQ_PARTS]; + struct vhost_iovec dummy_iovecs[NUM_OF_VIRTQ_PARTS]; + size_t dummy_read_count, dummy_write_count; + size_t total_pages = 0; + + for (size_t i = 0; i < NUM_OF_VIRTQ_PARTS; i++) { + meta_bufs[i].gpa = vq_ctx->virtq_parts_gpa[i]; + meta_bufs[i].len = num_pages[i] * XEN_PAGE_SIZE; + meta_bufs[i].is_write = true; + total_pages += num_pages[i]; + + LOG_DBG_Q("Meta range[%zu]: gpa=0x%" PRIx64 " len=%zu pages=%zu", i, + meta_bufs[i].gpa, meta_bufs[i].len, num_pages[i]); + } + + ret = init_pages_chunks(dev, queue_id, META_PAGES_INDEX(config), meta_bufs, + NUM_OF_VIRTQ_PARTS, dummy_iovecs, + 0, /* No read iovecs needed for meta pages */ + dummy_iovecs, NUM_OF_VIRTQ_PARTS, /* Write iovecs for meta pages */ + &dummy_read_count, &dummy_write_count, total_pages); + + if (ret < 0) { + LOG_ERR_Q("init_pages_chunks failed: %d", ret); + return ret; + } + + k_spinlock_key_t key = k_spin_lock(&vq_ctx->lock); + + for (size_t i = 0; i < config->queue_size_max; i++) { + struct mapped_pages_chunk *chunk = &vq_ctx->pages_chunks[i]; + + key = wait_for_chunk_ready(vq_ctx, chunk, key); + + chunk->map = NULL; + chunk->count = 0; + } + + k_spin_unlock(&vq_ctx->lock, key); + + return 0; +} + +static void reset_device(const struct device *dev) +{ + const struct vhost_xen_mmio_config *config = dev->config; + struct vhost_xen_mmio_data *data = dev->data; + + data->be.driver_features = 0; + data->be.device_features_sel = 0; + data->be.driver_features_sel = 0; + atomic_set(&data->be.irq_status, 0); + atomic_set(&data->be.status, 0); + atomic_set(&data->be.queue_sel, 0); + + for (size_t i = 0; i < config->num_queues; i++) { + reset_queue(dev, i); + } +} + +static void ioreq_server_read_req(const struct device *dev, struct ioreq *r) +{ + const struct vhost_xen_mmio_config *config = dev->config; + struct vhost_xen_mmio_data *data = dev->data; + const size_t addr_offset = r->addr - data->fe.base; + + LOG_DBG("count %u: size: %u vp_eport: %u state: %d df: %d type: %d", r->count, r->size, + r->vp_eport, r->state, r->df, r->type); + + switch (addr_offset) { + case VIRTIO_MMIO_MAGIC_VALUE: { + r->data = VIRTIO_MMIO_MAGIC; + } break; + case VIRTIO_MMIO_VERSION: { + r->data = VIRTIO_MMIO_SUPPORTED_VERSION; + } break; + case VIRTIO_MMIO_DEVICE_ID: { + r->data = config->device_id; + } break; + case VIRTIO_MMIO_VENDOR_ID: { + r->data = config->vendor_id; + } break; + case VIRTIO_MMIO_DEVICE_FEATURES: { + if (data->be.device_features_sel == 0) { + r->data = (config->device_features & UINT32_MAX); + } else if (data->be.device_features_sel == 1) { + r->data = (config->device_features >> 32); + } else { + r->data = 0; + } + } break; + case VIRTIO_MMIO_DRIVER_FEATURES: { + if (data->be.driver_features_sel == 0) { + r->data = (data->be.driver_features & UINT32_MAX); + } else if (data->be.driver_features_sel == 1) { + r->data = (data->be.driver_features >> 32); + } else { + r->data = 0; + } + } break; + case VIRTIO_MMIO_QUEUE_SIZE_MAX: { + r->data = config->queue_size_max; + } break; + case VIRTIO_MMIO_STATUS: { + r->data = atomic_get(&data->be.status); + } break; + case VIRTIO_MMIO_INTERRUPT_STATUS: { + r->data = atomic_clear(&data->be.irq_status); + } break; + case VIRTIO_MMIO_QUEUE_READY: { + r->data = vhost_queue_ready(dev, atomic_get(&data->be.queue_sel)); + } break; + default: { + r->data = -1; + } break; + } + + LOG_DBG("r/%zx %" PRIx64, addr_offset, r->data); +} + +static void ioreq_server_write_req(const struct device *dev, struct ioreq *r) +{ + const struct vhost_xen_mmio_config *config = dev->config; + struct vhost_xen_mmio_data *data = dev->data; + const size_t addr_offset = r->addr - data->fe.base; + + LOG_DBG("w/%zx %" PRIx64, addr_offset, r->data); + + switch (addr_offset) { + case VIRTIO_MMIO_DEVICE_FEATURES_SEL: { + if (r->data == 0 || r->data == 1) { + data->be.device_features_sel = (uint8_t)r->data; + } + } break; + case VIRTIO_MMIO_DRIVER_FEATURES_SEL: { + if (r->data == 0 || r->data == 1) { + data->be.driver_features_sel = (uint8_t)r->data; + } + } break; + case VIRTIO_MMIO_DRIVER_FEATURES: { + if (data->be.driver_features_sel == 0) { + uint64_t *drvfeats = &data->be.driver_features; + + *drvfeats = (r->data | (*drvfeats & 0xFFFFFFFF00000000)); + } else { + uint64_t *drvfeats = &data->be.driver_features; + + *drvfeats = ((r->data << 32) | (*drvfeats & UINT32_MAX)); + } + } break; + case VIRTIO_MMIO_INTERRUPT_ACK: { + if (r->data) { + atomic_and(&data->be.irq_status, ~r->data); + } + } break; + case VIRTIO_MMIO_STATUS: { + if (r->data & BIT(DEVICE_STATUS_FEATURES_OK)) { + const bool ok = !(data->be.driver_features & ~config->device_features); + + if (ok) { + atomic_or(&data->be.status, BIT(DEVICE_STATUS_FEATURES_OK)); + } else { + LOG_ERR("%" PRIx64 " d driver_feats=%" PRIx64 + " device_feats=%" PRIx64, + r->data, data->be.driver_features, config->device_features); + atomic_or(&data->be.status, BIT(DEVICE_STATUS_FAILED)); + atomic_and(&data->be.status, ~BIT(DEVICE_STATUS_FEATURES_OK)); + } + } else if (r->data == 0) { + reset_device(dev); + } else { + atomic_or(&data->be.status, r->data); + } + } break; + case VIRTIO_MMIO_QUEUE_DESC_LOW: + case VIRTIO_MMIO_QUEUE_DESC_HIGH: + case VIRTIO_MMIO_QUEUE_AVAIL_LOW: + case VIRTIO_MMIO_QUEUE_AVAIL_HIGH: + case VIRTIO_MMIO_QUEUE_USED_LOW: + case VIRTIO_MMIO_QUEUE_USED_HIGH: { + const uint16_t queue_id = atomic_get(&data->be.queue_sel); + + if (queue_id < config->num_queues) { + const size_t part = (addr_offset - VIRTIO_MMIO_QUEUE_DESC_LOW) / 0x10; + const bool hi = !!((addr_offset - VIRTIO_MMIO_QUEUE_DESC_LOW) % 0x10); + uint64_t *p_gpa = &data->vq_ctx[queue_id].virtq_parts_gpa[part]; + + *p_gpa = hi ? ((r->data << 32) | (*p_gpa & UINT32_MAX)) + : (r->data | (*p_gpa & 0xFFFFFFFF00000000)); + } + } break; + case VIRTIO_MMIO_QUEUE_NOTIFY: { + if (r->data < config->num_queues) { + atomic_set(&data->notify_queue_id, r->data); + k_work_schedule_for_queue(&data->workq, &data->isr_work, K_NO_WAIT); + } + } break; + case VIRTIO_MMIO_QUEUE_SIZE: { + const uint16_t queue_sel = atomic_get(&data->be.queue_sel); + + if (queue_sel < config->num_queues) { + const bool is_pow2 = (POPCOUNT((int)r->data) == 1); + + if (r->data > 0 && r->data <= config->queue_size_max && is_pow2) { + atomic_set(&data->vq_ctx[queue_sel].queue_size, r->data); + } else { + LOG_ERR("queue_size should be 2^n and <%u: size=%" PRIu64, + config->queue_size_max, r->data); + atomic_or(&data->be.status, BIT(DEVICE_STATUS_FAILED)); + } + } + } break; + case VIRTIO_MMIO_QUEUE_READY: { + const uint16_t queue_sel = atomic_get(&data->be.queue_sel); + const uint16_t queue_id = queue_sel; + + if (r->data == 0) { + reset_queue(dev, queue_id); + } else if (r->data && (queue_sel < config->num_queues)) { + int err = setup_queue(dev, queue_id); + + if (err < 0) { + atomic_or(&data->be.status, BIT(DEVICE_STATUS_FAILED)); + LOG_ERR("queue%u setup failed: %d", queue_sel, err); + } else if (data->queue_ready_cb.cb) { + data->queue_ready_cb.cb(dev, queue_id, data->queue_ready_cb.data); + } + } + } break; + case VIRTIO_MMIO_QUEUE_SEL: { + atomic_set(&data->be.queue_sel, r->data); + } break; + default: + break; + } +} + +static void ioreq_server_cb(void *ptr) +{ + const struct device *dev = ptr; + struct vhost_xen_mmio_data *data = dev->data; + struct ioreq *r = &data->shared_iopage->vcpu_ioreq[0]; + + if (r->state == STATE_IOREQ_READY) { + if (r->dir == IOREQ_WRITE) { + ioreq_server_write_req(dev, r); + } else { + ioreq_server_read_req(dev, r); + } + + r->state = STATE_IORESP_READY; + + barrier_dmem_fence_full(); + notify_evtchn(data->ioserv_port); + } +} + +static void bind_interdomain_nop(void *priv) +{ +} + +static void xs_notify_handler(const char *path, const char *token, void *param) +{ + const struct device *dev = param; + struct vhost_xen_mmio_data *data = dev->data; + + if (!atomic_get(&data->initialized) && !k_work_delayable_is_pending(&data->init_work)) { + k_work_schedule_for_queue(&data->workq, &data->init_work, K_NO_WAIT); + } +} + +static void isr_workhandler(struct k_work *work) +{ + const struct k_work_delayable *delayable = k_work_delayable_from_work(work); + struct vhost_xen_mmio_data *data = + CONTAINER_OF(delayable, struct vhost_xen_mmio_data, isr_work); + const struct device *dev = data->dev; + const struct vhost_xen_mmio_config *config = dev->config; + + const uint16_t queue_id = atomic_get(&data->notify_queue_id); + const struct virtq_context *vq_ctx = + (queue_id < config->num_queues) ? &data->vq_ctx[queue_id] : NULL; + + if (vq_ctx && vq_ctx->queue_notify_cb.cb) { + vq_ctx->queue_notify_cb.cb(dev, queue_id, vq_ctx->queue_notify_cb.data); + } +} + +static void ready_workhandler(struct k_work *work) +{ + const struct k_work_delayable *delayable = k_work_delayable_from_work(work); + struct vhost_xen_mmio_data *data = + CONTAINER_OF(delayable, struct vhost_xen_mmio_data, ready_work); + const struct device *dev = data->dev; + const struct vhost_xen_mmio_config *config = dev->config; + + for (size_t i = 0; i < config->num_queues; i++) { + bool queue_ready_notified = atomic_get(&data->vq_ctx[i].queue_ready_notified); + + if (vhost_queue_ready(dev, i) && data->queue_ready_cb.cb && !queue_ready_notified) { + data->queue_ready_cb.cb(dev, i, data->queue_ready_cb.data); + atomic_set(&data->vq_ctx[i].queue_ready_notified, 1); + } + } +} + +static void init_workhandler(struct k_work *work) +{ + struct k_work_delayable *delayable = k_work_delayable_from_work(work); + struct vhost_xen_mmio_data *data = + CONTAINER_OF(delayable, struct vhost_xen_mmio_data, init_work); + const struct device *dev = data->dev; + const struct vhost_xen_mmio_config *config = dev->config; + char baseaddr[HEX_64BIT_DIGITS + 3]; /* add rooms for "0x" and '\0' */ + uint32_t n_frms = 1; + xen_pfn_t gfn = 0; + mm_reg_t va; + int ret; + + if (atomic_get(&data->initialized)) { + return; + } + + /* + * Using the settings obtained from xenstore can only be checked for + * matching the base value. + * This means that if multiple FEs try to connect to a BE using the same base address, + * they cannot be matched correctly. + */ + + snprintf(baseaddr, sizeof(baseaddr), "0x%lx", config->base); + + struct query_param params[] = {{ + .key = "base", + .expected = baseaddr, + }}; + + ret = query_virtio_backend(params, ARRAY_SIZE(params), &data->fe.domid, &data->fe.deviceid); + if (ret < 0) { + LOG_INF("%s: failed %d", __func__, ret); + goto retry; + } + + data->fe.base = config->base; + data->fe.irq = query_irq(data->fe.domid, data->fe.deviceid); + if (data->fe.irq == -1) { + ret = -EINVAL; + goto retry; + } + + LOG_DBG("%u %u %lu", data->fe.domid, data->fe.irq, data->fe.base); + + ret = dmop_nr_vcpus(data->fe.domid); + if (ret < 0) { + LOG_ERR("dmop_nr_vcpus err=%d", ret); + goto retry; + } + data->vcpus = ret; + + ret = dmop_create_ioreq_server(data->fe.domid, HVM_IOREQSRV_BUFIOREQ_OFF, &data->fe.servid); + if (ret < 0) { + LOG_ERR("dmop_create_ioreq_server err=%d", ret); + goto retry; + } + + ret = dmop_map_io_range_to_ioreq_server(data->fe.domid, data->fe.servid, 1, data->fe.base, + data->fe.base + config->reg_size - 1); + if (ret < 0) { + LOG_ERR("dmop_map_io_range_to_ioreq_server err=%d", ret); + goto retry; + } + + ret = xendom_acquire_resource(data->fe.domid, XENMEM_resource_ioreq_server, data->fe.servid, + XENMEM_resource_ioreq_server_frame_ioreq(0), &n_frms, &gfn); + if (ret < 0) { + LOG_ERR("xendom_acquire_resource err=%d", ret); + goto retry; + } + + device_map(&va, (gfn << XEN_PAGE_SHIFT), (n_frms << XEN_PAGE_SHIFT), K_MEM_CACHE_NONE); + data->shared_iopage = (void *)va; + + ret = dmop_set_ioreq_server_state(data->fe.domid, data->fe.servid, 1); + if (ret) { + LOG_ERR("dmop_set_ioreq_server_state err=%d", ret); + goto retry; + } + + LOG_DBG("bind_interdomain dom=%d remote_port=%d", data->fe.domid, + data->shared_iopage->vcpu_ioreq[0].vp_eport); + + /* Assume that all interrupts are accepted by cpu0. */ + ret = bind_interdomain_event_channel(data->fe.domid, + data->shared_iopage->vcpu_ioreq[0].vp_eport, + bind_interdomain_nop, NULL); + if (ret < 0) { + LOG_ERR("EVTCHNOP_bind_interdomain[0] err=%d", ret); + goto retry; + } + data->ioserv_port = ret; + + bind_event_channel(data->ioserv_port, ioreq_server_cb, (void *)dev); + unmask_event_channel(data->ioserv_port); + + LOG_INF("%s: backend ready base=%zx fe.domid=%d irq=%d vcpus=%d shared_iopage=%p " + "ioserv_port=%d", + dev->name, data->fe.base, data->fe.domid, data->fe.irq, data->vcpus, + data->shared_iopage, data->ioserv_port); + + atomic_set(&data->initialized, 1); + + ret = 0; + +retry: + if (ret < 0) { + const uint32_t retry_count = MIN(RETRY_BACKOFF_EXP_MAX, atomic_inc(&data->retry)); + + reset_device(dev); + k_work_schedule_for_queue(&data->workq, &data->init_work, + K_MSEC(RETRY_DELAY_BASE_MS * (1 << retry_count))); + } + + LOG_INF("exit work_inithandler: %d", ret); +} + +static bool vhost_xen_mmio_virtq_is_ready(const struct device *dev, uint16_t queue_id) +{ + const struct vhost_xen_mmio_config *config = dev->config; + const struct vhost_xen_mmio_data *data = dev->data; + const struct virtq_context *vq_ctx = &data->vq_ctx[queue_id]; + const size_t queue_size = atomic_get(&vq_ctx->queue_size); + + if (queue_id >= config->num_queues) { + LOG_ERR_Q("Invalid queue ID"); + return false; + } + + if (queue_size == 0) { + return false; + } + + struct mapped_pages_chunk *meta = &vq_ctx->pages_chunks[META_PAGES_INDEX(config)]; + + if (!meta->map || meta->count != NUM_OF_VIRTQ_PARTS) { + return false; + } + + for (size_t i = 0; i < meta->count; i++) { + if (meta->map[i].buf == NULL) { + return false; + } + + for (size_t j = 0; j < meta->map[i].map_count; j++) { + if (meta->map[i].ops[j].status != GNTST_okay) { + return false; + } + } + } + + return vq_ctx->virtq_parts_gpa[VIRTQ_DESC] != 0 && + vq_ctx->virtq_parts_gpa[VIRTQ_AVAIL] != 0 && + vq_ctx->virtq_parts_gpa[VIRTQ_USED] != 0; +} + +static int vhost_xen_mmio_get_virtq(const struct device *dev, uint16_t queue_id, void **parts, + size_t *queue_size) +{ + const struct vhost_xen_mmio_config *config = dev->config; + const struct vhost_xen_mmio_data *data = dev->data; + const struct virtq_context *vq_ctx = &data->vq_ctx[queue_id]; + + if (queue_id >= config->num_queues) { + LOG_ERR_Q("Invalid queue ID"); + return -EINVAL; + } + + if (!vhost_xen_mmio_virtq_is_ready(dev, queue_id)) { + LOG_ERR_Q("not ready"); + return -ENODEV; + } + + struct mapped_pages_chunk *meta = &vq_ctx->pages_chunks[META_PAGES_INDEX(config)]; + + if (!meta->map || meta->count != NUM_OF_VIRTQ_PARTS) { + return -EINVAL; + } + + for (size_t i = 0; i < meta->count; i++) { + parts[i] = meta->map[i].buf + (vq_ctx->virtq_parts_gpa[i] & (XEN_PAGE_SIZE - 1)); + } + + if (!parts[VIRTQ_DESC] || !parts[VIRTQ_AVAIL] || !parts[VIRTQ_USED]) { + LOG_ERR_Q("failed to get ring base addresses"); + return -EINVAL; + } + + *queue_size = atomic_get(&vq_ctx->queue_size); + + LOG_DBG_Q("rings desc=%p, avail=%p, used=%p, size=%zu", parts[VIRTQ_DESC], + parts[VIRTQ_AVAIL], parts[VIRTQ_USED], *queue_size); + + return 0; +} + +static int vhost_xen_mmio_get_driver_features(const struct device *dev, uint64_t *drv_feats) +{ + const struct vhost_xen_mmio_data *data = dev->data; + + *drv_feats = data->be.driver_features; + + return 0; +} + +static int vhost_xen_mmio_notify_virtq(const struct device *dev, uint16_t queue_id) +{ + const struct vhost_xen_mmio_config *config = dev->config; + struct vhost_xen_mmio_data *data = dev->data; + + if (queue_id >= config->num_queues) { + LOG_ERR_Q("Invalid queue ID"); + return -EINVAL; + } + + atomic_or(&data->be.irq_status, VIRTIO_QUEUE_INTERRUPT); + + dmop_set_irq_level(data->fe.domid, data->fe.irq, 1); + dmop_set_irq_level(data->fe.domid, data->fe.irq, 0); + + return 0; +} + +static int vhost_xen_mmio_set_device_status(const struct device *dev, uint32_t status) +{ + struct vhost_xen_mmio_data *data = dev->data; + + if (!data) { + return -EINVAL; + } + + atomic_or(&data->be.status, status); + atomic_or(&data->be.irq_status, VIRTIO_DEVICE_CONFIGURATION_INTERRUPT); + + dmop_set_irq_level(data->fe.domid, data->fe.irq, 1); + dmop_set_irq_level(data->fe.domid, data->fe.irq, 0); + + return 0; +} + +static int vhost_xen_mmio_release_iovec(const struct device *dev, uint16_t queue_id, uint16_t head) +{ + const struct vhost_xen_mmio_config *config = dev->config; + struct vhost_xen_mmio_data *data = dev->data; + struct virtq_context *vq_ctx = &data->vq_ctx[queue_id]; + const size_t queue_size = atomic_get(&vq_ctx->queue_size); + int ret = 0; + + k_spinlock_key_t key = k_spin_lock(&vq_ctx->lock); + + if (queue_id >= config->num_queues) { + LOG_ERR_Q("Invalid queue ID"); + k_spin_unlock(&vq_ctx->lock, key); + return -EINVAL; + } + + if (head >= queue_size) { + LOG_ERR_Q("Invalid head: head=%u >= queue_size=%zu", head, queue_size); + k_spin_unlock(&vq_ctx->lock, key); + return -EINVAL; + } + + struct mapped_pages_chunk *chunk = &vq_ctx->pages_chunks[head]; + + if (!chunk->map || chunk->count == 0) { + LOG_ERR_Q("Head not in use: head=%u", head); + + k_spin_unlock(&vq_ctx->lock, key); + return -EINVAL; + } + + key = wait_for_chunk_ready(vq_ctx, chunk, key); + + chunk->releasing = true; + + k_spin_unlock(&vq_ctx->lock, key); + + for (size_t i = 0; i < chunk->count; i++) { + int rc = unmap_pages(&chunk->map[i]); + + if (rc < 0) { + LOG_ERR_Q("gnttab_unmap_refs failed for page %zu: %d", i, rc); + ret = rc; + } + } + + key = k_spin_lock(&vq_ctx->lock); + + for (size_t i = 0; i < chunk->count; i++) { + chunk->map[i].map_count = 0; + } + + chunk->releasing = false; + k_spin_unlock(&vq_ctx->lock, key); + + return ret; +} + +static int vhost_xen_mmio_prepare_iovec(const struct device *dev, uint16_t queue_id, uint16_t head, + const struct vhost_buf *bufs, size_t bufs_count, + struct vhost_iovec *r_iovecs, size_t r_iovecs_count, + struct vhost_iovec *w_iovecs, size_t w_iovecs_count, + size_t *read_count, size_t *write_count) +{ + const struct vhost_xen_mmio_config *config = dev->config; + struct vhost_xen_mmio_data *data = dev->data; + struct virtq_context *vq_ctx = &data->vq_ctx[queue_id]; + const size_t queue_size = atomic_get(&vq_ctx->queue_size); + size_t total_pages = 0; + int ret = 0; + + if (queue_id >= config->num_queues) { + LOG_ERR_Q("Invalid queue ID"); + ret = -EINVAL; + goto end; + } + + if (head >= queue_size) { + LOG_ERR_Q("Invalid head: head=%u >= queue_size=%zu", head, queue_size); + ret = -EINVAL; + goto end; + } + + for (size_t i = 0; i < bufs_count; i++) { + const uint64_t start_page = bufs[i].gpa >> XEN_PAGE_SHIFT; + const uint64_t end_page = (bufs[i].gpa + bufs[i].len - 1) >> XEN_PAGE_SHIFT; + + if (!(bufs[i].gpa & XEN_GRANT_ADDR_OFF)) { + LOG_ERR_Q("addr missing grant marker: 0x%" PRIx64, bufs[i].gpa); + ret = -EINVAL; + goto end; + } + + total_pages += end_page - start_page + 1; + } + + if (total_pages == 0) { + *read_count = 0; + *write_count = 0; + + return 0; + } + + LOG_DBG_Q("bufs_count=%zu total_pages=%zu max_read=%zu max_write=%zu", bufs_count, + total_pages, r_iovecs_count, w_iovecs_count); + + struct mapped_pages_chunk *chunk = &vq_ctx->pages_chunks[head]; + + if (chunk->map && chunk->count > 0 && chunk->map[0].map_count > 0) { + LOG_WRN("Found unreleased head: %d", head); + + ret = vhost_xen_mmio_release_iovec(dev, queue_id, head); + if (ret < 0) { + LOG_ERR_Q("vhost_xen_mmio_release_iovec: failed %d", ret); + goto end; + } + } + + ret = init_pages_chunks(dev, queue_id, head, bufs, bufs_count, r_iovecs, r_iovecs_count, + w_iovecs, w_iovecs_count, read_count, write_count, total_pages); + +end: + if (ret < 0) { + *read_count = 0; + *write_count = 0; + } + + return ret; +} + +static int vhost_xen_mmio_register_virtq_ready_cb(const struct device *dev, + void (*callback)(const struct device *dev, + uint16_t queue_id, + void *user_data), + void *user_data) +{ + const struct vhost_xen_mmio_config *config = dev->config; + struct vhost_xen_mmio_data *data = dev->data; + + data->queue_ready_cb.cb = callback; + data->queue_ready_cb.data = user_data; + + for (size_t i = 0; i < config->num_queues; i++) { + atomic_set(&data->vq_ctx[i].queue_ready_notified, 0); + } + + k_work_schedule_for_queue(&data->workq, &data->ready_work, K_NO_WAIT); + + return 0; +} + +static int vhost_xen_mmio_register_virtq_notify_cb(const struct device *dev, uint16_t queue_id, + void (*callback)(const struct device *dev, + uint16_t queue_id, + void *user_data), + void *user_data) +{ + const struct vhost_xen_mmio_config *config = dev->config; + struct vhost_xen_mmio_data *data = dev->data; + struct virtq_context *vq_ctx = &data->vq_ctx[queue_id]; + + if (queue_id >= config->num_queues) { + LOG_ERR_Q("Invalid queue ID"); + return -EINVAL; + } + + k_spinlock_key_t key = k_spin_lock(&vq_ctx->lock); + + vq_ctx->queue_notify_cb.cb = callback; + vq_ctx->queue_notify_cb.data = user_data; + + k_spin_unlock(&vq_ctx->lock, key); + + return 0; +} + +static const struct vhost_controller_api vhost_driver_xen_mmio_api = { + .virtq_is_ready = vhost_xen_mmio_virtq_is_ready, + .get_virtq = vhost_xen_mmio_get_virtq, + .get_driver_features = vhost_xen_mmio_get_driver_features, + .set_device_status = vhost_xen_mmio_set_device_status, + .notify_virtq = vhost_xen_mmio_notify_virtq, + .prepare_iovec = vhost_xen_mmio_prepare_iovec, + .release_iovec = vhost_xen_mmio_release_iovec, + .register_virtq_ready_cb = vhost_xen_mmio_register_virtq_ready_cb, + .register_virtq_notify_cb = vhost_xen_mmio_register_virtq_notify_cb, +}; + +static int vhost_xen_mmio_init(const struct device *dev) +{ + const struct k_work_queue_config qcfg = {.name = "vhost-mmio-wq"}; + const struct vhost_xen_mmio_config *config = dev->config; + struct vhost_xen_mmio_data *data = dev->data; + char buf[65] = {0}; + int ret; + + data->dev = dev; + + atomic_set(&data->initialized, 0); + atomic_set(&data->retry, 0); + atomic_set(&data->be.status, 0); + atomic_set(&data->be.queue_sel, 0); + atomic_set(&data->be.irq_status, 0); + atomic_set(&data->notify_queue_id, 0); + xen_events_init(); + + ret = xs_init(); + + if (ret < 0) { + LOG_INF("xs_init_xenstore failed: %d", ret); + return ret; + } + + k_work_init_delayable(&data->init_work, init_workhandler); + k_work_init_delayable(&data->isr_work, isr_workhandler); + k_work_init_delayable(&data->ready_work, ready_workhandler); + k_work_queue_init(&data->workq); + k_work_queue_start(&data->workq, config->workq_stack, config->workq_stack_size, + config->workq_priority, &qcfg); + + xs_watcher_init(&data->watcher, xs_notify_handler, (void *)dev); + xs_watcher_register(&data->watcher); + + ret = xs_watch("backend/virtio", dev->name, buf, ARRAY_SIZE(buf) - 1, 0); + + if (ret < 0) { + LOG_INF("xs_watch failed: %d", ret); + return ret; + } + + return 0; +} + +#define VQCTX_INIT(n, idx) \ + { \ + .pages_chunks = vhost_xen_mmio_pages_chunks_##idx[n], \ + } + +#define Q_NUM(idx) DT_INST_PROP_OR(idx, num_queues, 1) +#define Q_SZ_MAX(idx) DT_INST_PROP_OR(idx, queue_size_max, 1) + +#define VHOST_XEN_MMIO_INST(idx) \ + static K_THREAD_STACK_DEFINE(workq_stack_##idx, DT_INST_PROP_OR(idx, stack_size, 4096)); \ + static const struct vhost_xen_mmio_config vhost_xen_mmio_config_##idx = { \ + .queue_size_max = DT_INST_PROP_OR(idx, queue_size_max, 1), \ + .num_queues = DT_INST_PROP_OR(idx, num_queues, 1), \ + .device_id = DT_INST_PROP(idx, device_id), \ + .vendor_id = DT_INST_PROP_OR(idx, vendor_id, 0), \ + .base = DT_INST_PROP(idx, base), \ + .reg_size = XEN_PAGE_SIZE, \ + .workq_stack = (k_thread_stack_t *)&workq_stack_##idx, \ + .workq_stack_size = K_THREAD_STACK_SIZEOF(workq_stack_##idx), \ + .workq_priority = DT_INST_PROP_OR(idx, priority, 0), \ + .device_features = BIT(VIRTIO_F_VERSION_1) | BIT(VIRTIO_F_ACCESS_PLATFORM), \ + }; \ + struct mapped_pages_chunk vhost_xen_mmio_pages_chunks_##idx[Q_NUM(idx)] \ + [Q_SZ_MAX(idx) + 1]; \ + static struct virtq_context vhost_xen_mmio_vq_ctx_##idx[Q_NUM(idx)] = { \ + LISTIFY(Q_NUM(idx), VQCTX_INIT, (,), idx), \ + }; \ + static struct vhost_xen_mmio_data vhost_xen_mmio_data_##idx = { \ + .vq_ctx = vhost_xen_mmio_vq_ctx_##idx, \ + .fe.base = -1, \ + .ioserv_port = -1, \ + .fe.servid = -1, \ + }; \ + DEVICE_DT_INST_DEFINE(idx, vhost_xen_mmio_init, NULL, &vhost_xen_mmio_data_##idx, \ + &vhost_xen_mmio_config_##idx, POST_KERNEL, 100, \ + &vhost_driver_xen_mmio_api); + +DT_INST_FOREACH_STATUS_OKAY(VHOST_XEN_MMIO_INST) diff --git a/dts/bindings/vhost/xen,virtio-mmio.yaml b/dts/bindings/vhost/xen,virtio-mmio.yaml new file mode 100644 index 0000000000000..89f5d86eb9daf --- /dev/null +++ b/dts/bindings/vhost/xen,virtio-mmio.yaml @@ -0,0 +1,39 @@ +# Copyright (c) 2025 TOKITA Hiroshi +# SPDX-License-Identifier: Apache-2.0 + +description: Xen Platform Control Registers + +compatible: "xen,vhost-mmio" + +include: base.yaml + +properties: + base: + type: int + required: true + + num-queues: + type: int + required: true + + queue-size-max: + type: int + required: true + + device-id: + type: int + required: true + + vendor-id: + type: int + default: 0 + + stack-size: + type: int + description: > + Stack size (in bytes) for the instance-specific work_q thread. + + priority: + type: int + description: > + Priority for the instance-specific work_q thread. From 3641e2d9e7bf0415e01d63023d86cd99222e2c26 Mon Sep 17 00:00:00 2001 From: TOKITA Hiroshi Date: Sat, 17 May 2025 16:06:15 +0900 Subject: [PATCH 13/14] samples: drivers: virtualization: add vhost sample application Add sample application demonstrating vhost driver usage for VIRTIO backend implementations. Includes basic setup and configuration examples for Xen MMIO VirtIO backend. Signed-off-by: TOKITA Hiroshi --- .../virtualization/vhost/CMakeLists.txt | 8 ++ .../drivers/virtualization/vhost/README.rst | 52 +++++++++ .../virtualization/vhost/boards/xenvm.overlay | 16 +++ .../vhost/boards/xenvm_xenvm_gicv3.overlay | 7 ++ samples/drivers/virtualization/vhost/prj.conf | 6 + .../drivers/virtualization/vhost/sample.yaml | 20 ++++ .../drivers/virtualization/vhost/src/main.c | 109 ++++++++++++++++++ 7 files changed, 218 insertions(+) create mode 100644 samples/drivers/virtualization/vhost/CMakeLists.txt create mode 100644 samples/drivers/virtualization/vhost/README.rst create mode 100644 samples/drivers/virtualization/vhost/boards/xenvm.overlay create mode 100644 samples/drivers/virtualization/vhost/boards/xenvm_xenvm_gicv3.overlay create mode 100644 samples/drivers/virtualization/vhost/prj.conf create mode 100644 samples/drivers/virtualization/vhost/sample.yaml create mode 100644 samples/drivers/virtualization/vhost/src/main.c diff --git a/samples/drivers/virtualization/vhost/CMakeLists.txt b/samples/drivers/virtualization/vhost/CMakeLists.txt new file mode 100644 index 0000000000000..ceb1058eff75a --- /dev/null +++ b/samples/drivers/virtualization/vhost/CMakeLists.txt @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.20.0) + +find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE}) +project(vhost) + +target_sources(app PRIVATE src/main.c) diff --git a/samples/drivers/virtualization/vhost/README.rst b/samples/drivers/virtualization/vhost/README.rst new file mode 100644 index 0000000000000..dfaf59575c782 --- /dev/null +++ b/samples/drivers/virtualization/vhost/README.rst @@ -0,0 +1,52 @@ +.. zephyr:code-sample:: vhost + :name: Vhost sample application + +Overview +******** + +This sample demonstrates the use of the vhost driver subsystem for implementing +VIRTIO backends in Zephyr. The application shows how to: + +* Initialize and configure vhost devices +* Handle VIRTIO queue operations +* Process guest requests using the vringh utility +* Implement a basic VIRTIO backend for Xen virtualization + +The sample sets up a vhost device that can communicate with VIRTIO frontend +drivers in guest virtual machines, specifically designed for Xen MMIO +virtualization environments. + +Requirements +************ + +This sample requires: + +* A Xen hypervisor environment +* Xen domain management tools +* A board that supports Xen virtualization (e.g., xenvm) + +Building and Running +******************** + +This application can be built and executed on Xen as follows: + +.. zephyr-app-commands:: + :zephyr-app: samples/drivers/virtualization/vhost + :host-os: unix + :board: xenvm + :goals: run + :compact: + +The application will initialize the vhost subsystem and wait for VIRTIO +frontend connections from guest domains. When a guest connects and sends +requests, the sample will process them and provide responses. + +Expected Output +*************** + +When running successfully, you should see output similar to:: + + *** Booting Zephyr OS build zephyr-v3.x.x *** + [00:00:00.000,000] vhost: VHost device ready + [00:00:00.000,000] vhost: queue_ready_handler(dev=0x..., qid=0, data=0x...) + [00:00:00.000,000] vhost: vringh_kick_handler: queue_id=0 diff --git a/samples/drivers/virtualization/vhost/boards/xenvm.overlay b/samples/drivers/virtualization/vhost/boards/xenvm.overlay new file mode 100644 index 0000000000000..31e6bbf596795 --- /dev/null +++ b/samples/drivers/virtualization/vhost/boards/xenvm.overlay @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2025 TOKITA Hiroshi + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/ { + vhost: xen-vhost-mmio { + compatible = "xen,vhost-mmio"; + device-id = <4>; + vendor-id = <0x7a707972>; /* "zphy" */ + num-queues = <2>; + queue-size-max= <16>; + base = <0x2000000>; + }; +}; diff --git a/samples/drivers/virtualization/vhost/boards/xenvm_xenvm_gicv3.overlay b/samples/drivers/virtualization/vhost/boards/xenvm_xenvm_gicv3.overlay new file mode 100644 index 0000000000000..363550cc41acd --- /dev/null +++ b/samples/drivers/virtualization/vhost/boards/xenvm_xenvm_gicv3.overlay @@ -0,0 +1,7 @@ +/* + * Copyright (c) 2025 TOKITA Hiroshi + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "xenvm.overlay" diff --git a/samples/drivers/virtualization/vhost/prj.conf b/samples/drivers/virtualization/vhost/prj.conf new file mode 100644 index 0000000000000..71a1adb90e2af --- /dev/null +++ b/samples/drivers/virtualization/vhost/prj.conf @@ -0,0 +1,6 @@ +CONFIG_VHOST=y +CONFIG_LOG=y +CONFIG_HEAP_MEM_POOL_SIZE=8388608 +CONFIG_LOG_MODE_IMMEDIATE=y +CONFIG_LOG_DEFAULT_LEVEL=3 +CONFIG_VHOST_LOG_LEVEL_DBG=y diff --git a/samples/drivers/virtualization/vhost/sample.yaml b/samples/drivers/virtualization/vhost/sample.yaml new file mode 100644 index 0000000000000..e55f344cecb99 --- /dev/null +++ b/samples/drivers/virtualization/vhost/sample.yaml @@ -0,0 +1,20 @@ +sample: + description: VHost driver sample for VIRTIO backend implementation + name: vhost +common: + integration_platforms: + - xenvm + platform_allow: xenvm + harness: console + harness_config: + type: multi_line + regex: + - "VHost device ready" + - "VHost sample application started" + timeout: 60 + depends_on: vhost +tests: + sample.drivers.virtualization.vhost: + tags: drivers virtualization vhost xen + min_ram: 32 + extra_args: CONFIG_LOG_MODE_IMMEDIATE=y diff --git a/samples/drivers/virtualization/vhost/src/main.c b/samples/drivers/virtualization/vhost/src/main.c new file mode 100644 index 0000000000000..7895d5c38f83d --- /dev/null +++ b/samples/drivers/virtualization/vhost/src/main.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2025 TOKITA Hiroshi + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include + +LOG_MODULE_REGISTER(vhost); + +struct vhost_iovec riovec[16]; +struct vhost_iovec wiovec[16]; + +struct vringh_iov riov = { + .iov = riovec, + .max_num = 16, +}; + +struct vringh_iov wiov = { + .iov = wiovec, + .max_num = 16, +}; + +struct vringh vrh_inst; + +static void vringh_kick_handler(struct vringh *vrh) +{ + LOG_DBG("%s: queue_id=%lu", __func__, vrh->queue_id); + uint16_t head; + + while (true) { + int ret = vringh_getdesc(vrh, &riov, &wiov, &head); + + if (ret < 0) { + LOG_ERR("vringh_getdesc failed: %d", ret); + return; + } + + if (ret == 0) { + return; + } + + /* Process writable iovecs */ + for (uint32_t s = 0; s < wiov.used; s++) { + uint8_t *dst = wiov.iov[s].iov_base; + uint32_t len = wiov.iov[s].iov_len; + + LOG_DBG("%s: addr=%p len=%u", __func__, dst, len); + + for (uint32_t i = 0; i < len; i++) { + sys_write8(i, (mem_addr_t)&dst[i]); + } + } + + barrier_dmem_fence_full(); + + uint32_t total_len = 0; + + for (uint32_t i = 0; i < wiov.used; i++) { + total_len += wiov.iov[i].iov_len; + } + + vringh_complete(vrh, head, total_len); + + if (vringh_need_notify(vrh) > 0) { + vringh_notify(vrh); + } + + /* Reset iovecs for next iteration */ + vringh_iov_reset(&riov); + vringh_iov_reset(&wiov); + } +} + +void queue_ready_handler(const struct device *dev, uint16_t qid, void *data) +{ + LOG_DBG("%s(dev=%p, qid=%u, data=%p)", __func__, dev, qid, data); + + /* Initialize iovecs before descriptor processing */ + vringh_iov_init(&riov, riov.iov, riov.max_num); + vringh_iov_init(&wiov, wiov.iov, wiov.max_num); + + int err = vringh_init_device(&vrh_inst, dev, qid, vringh_kick_handler); + + if (err) { + LOG_ERR("vringh_init_device failed: %d", err); + return; + } +} + +int main(void) +{ + const struct device *device = DEVICE_DT_GET(DT_NODELABEL(vhost)); + + if (!device_is_ready(device)) { + LOG_ERR("VHost device not ready"); + return -ENODEV; + } + + LOG_INF("VHost device ready"); + vhost_register_virtq_ready_cb(device, queue_ready_handler, (void *)device); + + LOG_INF("VHost sample application started, waiting for guest connections..."); + k_sleep(K_FOREVER); + + return 0; +} From e9a424aaa29e87935a07cdbd101f961e2bd398cd Mon Sep 17 00:00:00 2001 From: TOKITA Hiroshi Date: Sat, 15 Nov 2025 08:01:50 +0900 Subject: [PATCH 14/14] Support VIRTIO_RING_F_EVENT_IDX --- drivers/virtio/virtio_common.c | 19 ++++++--- drivers/virtio/virtio_mmio.c | 48 ++++++++++++--------- drivers/virtio/virtio_pci.c | 52 +++++++++++++---------- drivers/virtio/virtqueue.c | 28 ++++++++++-- include/zephyr/drivers/virtio/virtqueue.h | 36 +++++++++++++++- 5 files changed, 130 insertions(+), 53 deletions(-) diff --git a/drivers/virtio/virtio_common.c b/drivers/virtio/virtio_common.c index 44012370239d0..a6d6ae1dd1fc1 100644 --- a/drivers/virtio/virtio_common.c +++ b/drivers/virtio/virtio_common.c @@ -55,15 +55,20 @@ void virtio_isr(const struct device *dev, uint8_t isr_status, uint16_t virtqueue next = vq->desc[curr_le].next; last = !(vq->desc[curr_le].flags & VIRTQ_DESC_F_NEXT); virtq_add_free_desc(vq, curr); - } + } - vq->last_used_idx++; + vq->last_used_idx++; - if (cbe.cb) { - cbe.cb(cbe.opaque, used_len); - } - } - } + if (vq->event_idx_enabled) { + *vq->used_event = sys_cpu_to_le16(vq->last_used_idx); + barrier_dmem_fence_full(); + } + + if (cbe.cb) { + cbe.cb(cbe.opaque, used_len); + } + } + } } if (isr_status & VIRTIO_DEVICE_CONFIGURATION_INTERRUPT) { LOG_ERR("device configuration change interrupt is currently unsupported"); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index f9ad7070ec60b..f1b27c5a094f8 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -26,13 +26,14 @@ LOG_MODULE_REGISTER(virtio_mmio, CONFIG_VIRTIO_LOG_LEVEL); #define DEV_DATA(dev) ((struct virtio_mmio_data *)(dev)->data) struct virtio_mmio_data { - DEVICE_MMIO_NAMED_RAM(reg_base); + DEVICE_MMIO_NAMED_RAM(reg_base); - struct virtq *virtqueues; - uint16_t virtqueue_count; + struct virtq *virtqueues; + uint16_t virtqueue_count; + bool event_idx_enabled; - struct k_spinlock isr_lock; - struct k_spinlock notify_lock; + struct k_spinlock isr_lock; + struct k_spinlock notify_lock; }; struct virtio_mmio_config { @@ -220,16 +221,18 @@ static int virtio_mmio_set_virtqueues(const struct device *dev, uint16_t queue_c const uint16_t queue_size = cb(i, virtio_mmio_read32(dev, VIRTIO_MMIO_QUEUE_SIZE_MAX), opaque); - ret = virtq_create(&data->virtqueues[i], queue_size); - if (ret != 0) { - goto fail; - } - created_queues++; + ret = virtq_create(&data->virtqueues[i], queue_size); + if (ret != 0) { + goto fail; + } + created_queues++; - ret = virtio_mmio_set_virtqueue(dev, i, &data->virtqueues[i]); - if (ret != 0) { - goto fail; - } + virtq_enable_event_idx(&data->virtqueues[i], data->event_idx_enabled); + + ret = virtio_mmio_set_virtqueue(dev, i, &data->virtqueues[i]); + if (ret != 0) { + goto fail; + } activated_queues++; } @@ -273,7 +276,9 @@ static DEVICE_API(virtio, virtio_mmio_driver_api) = { static int virtio_mmio_init_common(const struct device *dev) { - DEVICE_MMIO_NAMED_MAP(dev, reg_base, K_MEM_CACHE_NONE); + struct virtio_mmio_data *data = dev->data; + + DEVICE_MMIO_NAMED_MAP(dev, reg_base, K_MEM_CACHE_NONE); const uint32_t magic = virtio_mmio_read32(dev, VIRTIO_MMIO_MAGIC_VALUE); @@ -300,12 +305,17 @@ static int virtio_mmio_init_common(const struct device *dev) virtio_mmio_reset(dev); - virtio_mmio_write_status_bit(dev, DEVICE_STATUS_ACKNOWLEDGE); - virtio_mmio_write_status_bit(dev, DEVICE_STATUS_DRIVER); + virtio_mmio_write_status_bit(dev, DEVICE_STATUS_ACKNOWLEDGE); + virtio_mmio_write_status_bit(dev, DEVICE_STATUS_DRIVER); - virtio_mmio_write_driver_feature_bit(dev, VIRTIO_F_VERSION_1, true); + virtio_mmio_write_driver_feature_bit(dev, VIRTIO_F_VERSION_1, true); - return 0; + data->event_idx_enabled = virtio_mmio_read_device_feature_bit(dev, VIRTIO_RING_F_EVENT_IDX); + if (data->event_idx_enabled) { + virtio_mmio_write_driver_feature_bit(dev, VIRTIO_RING_F_EVENT_IDX, true); + } + + return 0; }; #define VIRTIO_MMIO_DEFINE(inst) \ diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 7b7000b9475c2..45f332542d061 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -83,17 +83,18 @@ struct virtio_pci_common_cfg { #define VIRTIO_PCI_MSIX_NO_VECTOR 0xffff struct virtio_pci_data { - volatile struct virtio_pci_common_cfg *common_cfg; - void *device_specific_cfg; - volatile uint8_t *isr_status; - volatile uint8_t *notify_cfg; - uint32_t notify_off_multiplier; - - struct virtq *virtqueues; - uint16_t virtqueue_count; - - struct k_spinlock isr_lock; - struct k_spinlock notify_lock; + volatile struct virtio_pci_common_cfg *common_cfg; + void *device_specific_cfg; + volatile uint8_t *isr_status; + volatile uint8_t *notify_cfg; + uint32_t notify_off_multiplier; + + struct virtq *virtqueues; + uint16_t virtqueue_count; + bool event_idx_enabled; + + struct k_spinlock isr_lock; + struct k_spinlock notify_lock; }; struct virtio_pci_config { @@ -284,16 +285,18 @@ static int virtio_pci_init_virtqueues( uint16_t queue_size = cb(i, sys_le16_to_cpu(data->common_cfg->queue_size), opaque); - ret = virtq_create(&data->virtqueues[i], queue_size); - if (ret != 0) { - goto fail; - } - created_queues++; + ret = virtq_create(&data->virtqueues[i], queue_size); + if (ret != 0) { + goto fail; + } + created_queues++; - ret = virtio_pci_set_virtqueue(dev, i, &data->virtqueues[i]); - if (ret != 0) { - goto fail; - } + virtq_enable_event_idx(&data->virtqueues[i], data->event_idx_enabled); + + ret = virtio_pci_set_virtqueue(dev, i, &data->virtqueues[i]); + if (ret != 0) { + goto fail; + } activated_queues++; } @@ -534,9 +537,14 @@ static int virtio_pci_init_common(const struct device *dev) return 1; } - virtio_pci_write_driver_feature_bit(dev, VIRTIO_F_VERSION_1, 1); + virtio_pci_write_driver_feature_bit(dev, VIRTIO_F_VERSION_1, 1); - return 0; + data->event_idx_enabled = virtio_pci_read_device_feature_bit(dev, VIRTIO_RING_F_EVENT_IDX); + if (data->event_idx_enabled) { + virtio_pci_write_driver_feature_bit(dev, VIRTIO_RING_F_EVENT_IDX, 1); + } + + return 0; }; struct virtq *virtio_pci_get_virtqueue(const struct device *dev, uint16_t queue_idx) diff --git a/drivers/virtio/virtqueue.c b/drivers/virtio/virtqueue.c index 4ff78caa8f089..baa275489b111 100644 --- a/drivers/virtio/virtqueue.c +++ b/drivers/virtio/virtqueue.c @@ -55,7 +55,11 @@ int virtq_create(struct virtq *v, size_t size) v->desc = (struct virtq_desc *)v_area; v->avail = (struct virtq_avail *)((uint8_t *)v->desc + descriptor_table_size); v->used = (struct virtq_used *)((uint8_t *)v->avail + available_ring_size + used_ring_pad); - v->recv_cbs = (struct virtq_receive_callback_entry *)((uint8_t *)v->used + used_ring_size); + v->recv_cbs = (struct virtq_receive_callback_entry *)((uint8_t *)v->used + used_ring_size); + v->used_event = &v->avail->ring[size]; + v->avail_event = (uint16_t *)((uint8_t *)v->used->ring + + sizeof(struct virtq_used_elem) * size); + v->event_idx_enabled = false; /* * At the beginning of the descriptor table, the available ring and the used ring have to be @@ -183,6 +187,24 @@ int virtq_get_free_desc(struct virtq *v, uint16_t *desc_idx, k_timeout_t timeout void virtq_add_free_desc(struct virtq *v, uint16_t desc_idx) { - k_stack_push(&v->free_desc_stack, desc_idx); - v->free_desc_n++; + k_stack_push(&v->free_desc_stack, desc_idx); + v->free_desc_n++; +} + +void virtq_enable_event_idx(struct virtq *v, bool enable) +{ + v->event_idx_enabled = enable; + + if (enable) { + *v->used_event = sys_cpu_to_le16(v->last_used_idx); + } else { + *v->used_event = 0; + } + + barrier_dmem_fence_full(); +} + +bool virtq_is_event_idx_enabled(const struct virtq *v) +{ + return v->event_idx_enabled; } diff --git a/include/zephyr/drivers/virtio/virtqueue.h b/include/zephyr/drivers/virtio/virtqueue.h index 5df3e2f294c8b..5bfbe0244c0ee 100644 --- a/include/zephyr/drivers/virtio/virtqueue.h +++ b/include/zephyr/drivers/virtio/virtqueue.h @@ -6,8 +6,9 @@ #ifndef ZEPHYR_VIRTIO_VIRTQUEUE_H_ #define ZEPHYR_VIRTIO_VIRTQUEUE_H_ -#include #include +#include +#include #include #ifdef __cplusplus @@ -190,7 +191,22 @@ struct virtq { /** * array with callbacks invoked after receiving buffers back from the device */ - struct virtq_receive_callback_entry *recv_cbs; + struct virtq_receive_callback_entry *recv_cbs; + + /** + * pointer to used_event field, valid if @ref event_idx_enabled is true + */ + uint16_t *used_event; + + /** + * pointer to avail_event field, valid if @ref event_idx_enabled is true + */ + uint16_t *avail_event; + + /** + * indicates whether @ref VIRTIO_RING_F_EVENT_IDX was negotiated for the queue + */ + bool event_idx_enabled; }; @@ -267,6 +283,22 @@ void virtq_add_free_desc(struct virtq *v, uint16_t desc_idx); */ int virtq_get_free_desc(struct virtq *v, uint16_t *desc_idx, k_timeout_t timeout); +/** + * @brief Enables or disables @ref VIRTIO_RING_F_EVENT_IDX support for a queue. + * + * @param v virtqueue to update + * @param enable true when the feature was negotiated + */ +void virtq_enable_event_idx(struct virtq *v, bool enable); + +/** + * @brief Returns true when @ref VIRTIO_RING_F_EVENT_IDX is enabled. + * + * @param v virtqueue to query + * @return true if event index support is enabled + */ +bool virtq_is_event_idx_enabled(const struct virtq *v); + /** * @} */