Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Hyper-V Nested Virt and Guest UEFI Support #729

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions recipes-extended/xen/files/fix-nesting-1.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Index: xen-4.6.4/xen/arch/x86/hvm/vmx/vvmx.c
===================================================================
--- xen-4.6.4.orig/xen/arch/x86/hvm/vmx/vvmx.c
+++ xen-4.6.4/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1914,6 +1914,7 @@ int nvmx_msr_read_intercept(unsigned int
/* 1-seetings */
data = PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING |
+ PIN_BASED_VIRTUAL_NMIS |
PIN_BASED_PREEMPT_TIMER;
data = gen_vmx_msr(data, VMX_PINBASED_CTLS_DEFAULT1, host_data);
break;
@@ -2278,8 +2279,10 @@ int nvmx_n2_vmexit_handler(struct cpu_us
if ( !++port )
nvcpu->nv_vmexit_pending = 1;
} while ( !nvcpu->nv_vmexit_pending );
+#if 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of just disabling, why not test for DEBUG. That way it is easier to turn on when troubleshooting.

if ( !nvcpu->nv_vmexit_pending )
printk(XENLOG_G_WARNING "L0 PIO %04x\n", port);
+#endif
}
else if ( ctrl & CPU_BASED_UNCOND_IO_EXITING )
nvcpu->nv_vmexit_pending = 1;
151 changes: 151 additions & 0 deletions recipes-extended/xen/files/fix-nesting-2.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
Index: xen-4.6.4/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.6.4.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.6.4/xen/arch/x86/mm/p2m.c
@@ -2024,6 +2024,128 @@ p2m_get_p2m(struct vcpu *v)
return p2m_get_nestedp2m(v, nhvm_vcpu_p2m_base(v));
}

+
+
+static unsigned long paging_get_l1_pfn_from_l2_pa(struct vcpu *v, unsigned long l2_pa)
+{
+ unsigned long l1_pfn;
+
+ unsigned int page_order;
+ uint8_t ept_p2m_acc;
+ uint64_t exit_qual;
+ uint32_t exit_reason;
+
+ /* BUG: We use access 0 here - but EPT requires we check the RWX/US from pfec*/
+
+ if (nept_translate_l2ga(v,l2_pa & PAGE_MASK,
+ &page_order, 0, &l1_pfn, &ept_p2m_acc,
+ &exit_qual, &exit_reason) != EPT_TRANSLATE_SUCCEED)
+ return INVALID_GFN;
+
+
+ return l1_pfn;
+}
+
+
+static int paging_read_l2_entry(struct vcpu *v, unsigned long l2_pa, uint64_t * entry)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please be consistent with spacing and honor spacing conventions of existing code you are patching.

+{
+ void *page;
+ void *pte;
+ unsigned long l1_pfn;
+
+
+l1_pfn = paging_get_l1_pfn_from_l2_pa(v,l2_pa);
+if (l1_pfn == INVALID_GFN) return 1;
+
+ pte = page = hvm_map_guest_frame_ro (l1_pfn, 0);
+ if (!page) {
+ *entry = ~0ULL;
+ return 1;
+ }
+
+ pte += l2_pa & ~PAGE_MASK;
+
+ memcpy (entry, pte, sizeof (pte));
+
+ hvm_unmap_guest_frame (page, 0);
+
+ return 0;
+}
+
+static unsigned int paging_get_l2_pa_from_l2_va(struct vcpu *v,unsigned long l2_va, unsigned long *l2_pa)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spacing here as well

+{
+ /* God awful hacked up code - doesn't check permisions, and only works in LM */
+ unsigned long l2_cr3=v->arch.hvm_vcpu.guest_cr[3];
+
+ uint64_t pml4e_addr;
+ uint64_t pml4e;
+
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick: Is this extra white space needed?

+ uint64_t pdpte_addr;
+ uint64_t pdpte;
+
+ uint64_t pde_addr;
+ uint64_t pde;
+
+ uint64_t pte_addr;
+ uint64_t pte;
+
+
+ pml4e_addr = l2_cr3 & 0xffffffffff000ULL;
+ pml4e_addr |= (l2_va >> 36) & 0xff8;
+
+ if (paging_read_l2_entry (v, pml4e_addr, &pml4e))
+ return 1;
+
+ pdpte_addr = pml4e & 0xffffffffff000ULL;
+ pdpte_addr |= (l2_va >> 27) & 0xff8;
+
+ if (paging_read_l2_entry (v, pdpte_addr, &pdpte))
+ return 1;;
+
+ if (pdpte & 0x80) {
+ (*l2_pa) = pdpte & 0xffffffffff000ULL;
+ (*l2_pa) |= l2_va & 0x3FFFFFFF;
+ return 0;
+ }
+
+ pde_addr = pdpte & 0xffffffffff000ULL;
+ pde_addr |= (l2_va >> 18) & 0xff8;
+
+ if (paging_read_l2_entry (v, pde_addr, &pde))
+ return 1;
+
+ if (pde & 0x80)
+ {
+ (*l2_pa) = pde & 0xffffffffff000ULL;
+ (*l2_pa) |= l2_va & 0x1FFFFF;
+ return 0;
+ }
+
+ pte_addr = pde & 0xffffffffff000ULL;
+ pte_addr |= (l2_va >> 9) & 0xff8;
+
+ if (paging_read_l2_entry (v, pte_addr, &pte))
+ return 1;
+
+ (*l2_pa) = pte & 0xffffffffff000ULL;
+ (*l2_pa) |= l2_va & 0xFFF;
+
+ return 0;
+}
+
+
+static unsigned long paging_get_l1_pfn_from_l2_va(struct vcpu *v, unsigned long va,uint32_t *pfec)
+{
+ /* BUG: We dont check access either EPT or the L2 guest page tables */
+
+ uint64_t l2_pa;
+
+if (paging_get_l2_pa_from_l2_va(v,va,&l2_pa))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indentation

+ return INVALID_GFN;
+
+return paging_get_l1_pfn_from_l2_pa(v,l2_pa);
+}
+
unsigned long paging_gva_to_gfn(struct vcpu *v,
unsigned long va,
uint32_t *pfec)
@@ -2033,6 +2155,7 @@ unsigned long paging_gva_to_gfn(struct v

if ( is_hvm_vcpu(v) && paging_mode_hap(v->domain) && nestedhvm_is_n2(v) )
{
+#if 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't just turn off code blocks, if replacing then replace. If it is being left to provide understanding, then add a comment within the block to let people know why its still here.

unsigned long l2_gfn, l1_gfn;
struct p2m_domain *p2m;
const struct paging_mode *mode;
@@ -2066,6 +2189,9 @@ unsigned long paging_gva_to_gfn(struct v
(l1_gfn & ((1ul << l1_page_order) - 1)));

return l1_gfn;
+#else
+ return paging_get_l1_pfn_from_l2_va(v,va,pfec);
+#endif
}

return hostmode->gva_to_gfn(v, hostp2m, va, pfec);
37 changes: 37 additions & 0 deletions recipes-extended/xen/files/hvmloader-nvram.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
Index: xen-4.6.4/tools/firmware/hvmloader/ovmf.c
===================================================================
--- xen-4.6.4.orig/tools/firmware/hvmloader/ovmf.c
+++ xen-4.6.4/tools/firmware/hvmloader/ovmf.c
@@ -38,8 +38,14 @@
#include "roms.inc"

#define OVMF_SIZE (sizeof(ovmf))
+#define OVMF_VARS_SIZE 0x20000ULL
+#define OVMF_CODE_SIZE (OVMF_SIZE - OVMF_VARS_SIZE)
+
#define OVMF_MAXOFFSET 0x000FFFFFULL
#define OVMF_BEGIN (0x100000000ULL - ((OVMF_SIZE + OVMF_MAXOFFSET) & ~OVMF_MAXOFFSET))
+#define OVMF_VARS_BEGIN OVMF_BEGIN
+#define OVMF_VARS_END (OVMF_VARS_BEGIN + OVMF_VARS_SIZE)
+#define OVMF_CODE_BEGIN OVMF_VARS_END
#define OVMF_END (OVMF_BEGIN + OVMF_SIZE)
#define LOWCHUNK_BEGIN 0x000F0000
#define LOWCHUNK_SIZE 0x00010000
@@ -96,7 +102,7 @@ static void ovmf_finish_bios_info(void)
static void ovmf_load(const struct bios_config *config)
{
xen_pfn_t mfn;
- uint64_t addr = OVMF_BEGIN;
+ uint64_t addr = OVMF_CODE_BEGIN;

/* Copy low-reset vector portion. */
memcpy((void *) LOWCHUNK_BEGIN, (uint8_t *) config->image
@@ -113,7 +119,7 @@ static void ovmf_load(const struct bios_
}

/* Copy FD. */
- memcpy((void *) OVMF_BEGIN, config->image, OVMF_SIZE);
+ memcpy((void *) OVMF_CODE_BEGIN, config->image + OVMF_VARS_SIZE, OVMF_CODE_SIZE);
}

static void ovmf_acpi_build_tables(void)
65 changes: 65 additions & 0 deletions recipes-extended/xen/files/hvmloader-qemu-q35.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
Index: xen-4.6.4/tools/firmware/hvmloader/pci.c
===================================================================
--- xen-4.6.4.orig/tools/firmware/hvmloader/pci.c
+++ xen-4.6.4/tools/firmware/hvmloader/pci.c
@@ -131,19 +131,23 @@ void pci_setup(void)
if ( s )
mmio_hole_size = strtoll(s, NULL, 0);

- /* Program PCI-ISA bridge with appropriate link routes. */
- isa_irq = 0;
- for ( link = 0; link < 4; link++ )
- {
- do { isa_irq = (isa_irq + 1) & 15;
- } while ( !(PCI_ISA_IRQ_MASK & (1U << isa_irq)) );
- pci_writeb(PCI_ISA_DEVFN, 0x60 + link, isa_irq);
- printf("PCI-ISA link %u routed to IRQ%u\n", link, isa_irq);
- }

- /* Program ELCR to match PCI-wired IRQs. */
- outb(0x4d0, (uint8_t)(PCI_ISA_IRQ_MASK >> 0));
- outb(0x4d1, (uint8_t)(PCI_ISA_IRQ_MASK >> 8));
+ if ((pci_readw(PCI_ISA_DEVFN, PCI_VENDOR_ID) == 0x8086) &&
+ (pci_readw(PCI_ISA_DEVFN, PCI_VENDOR_ID) == 0x7000)) {
Copy link
Contributor

@tklengyel tklengyel Aug 29, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This here should be PCI_DEVICE_ID

+ /* Program PCI-ISA bridge with appropriate link routes. */
+ isa_irq = 0;
+ for ( link = 0; link < 4; link++ )
+ {
+ do { isa_irq = (isa_irq + 1) & 15;
+ } while ( !(PCI_ISA_IRQ_MASK & (1U << isa_irq)) );
+ pci_writeb(PCI_ISA_DEVFN, 0x60 + link, isa_irq);
+ printf("PCI-ISA link %u routed to IRQ%u\n", link, isa_irq);
+ }
+
+ /* Program ELCR to match PCI-wired IRQs. */
+ outb(0x4d0, (uint8_t)(PCI_ISA_IRQ_MASK >> 0));
+ outb(0x4d1, (uint8_t)(PCI_ISA_IRQ_MASK >> 8));
+ }

/* Scan the PCI bus and map resources. */
for ( devfn = 0; devfn < 256; devfn++ )
@@ -154,8 +158,10 @@ void pci_setup(void)
if ( (vendor_id == 0xffff) && (device_id == 0xffff) )
continue;

+#if 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again don't #if out replaced code.

ASSERT((devfn != PCI_ISA_DEVFN) ||
((vendor_id == 0x8086) && (device_id == 0x7000)));
+#endif

switch ( class )
{
Index: xen-4.6.4/tools/firmware/hvmloader/acpi/dsdt.asl
===================================================================
--- xen-4.6.4.orig/tools/firmware/hvmloader/acpi/dsdt.asl
+++ xen-4.6.4/tools/firmware/hvmloader/acpi/dsdt.asl
@@ -223,7 +223,8 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2,

Device (ISA)
{
- Name (_ADR, 0x00010000) /* device 1, fn 0 */
+ //Name (_ADR, 0x00010000) /* device 1, fn 0 */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just like #if, if you are leaving for understanding, then explain otherwise just delete the line

+ Name (_ADR, 0x001f0000) /* device 1f, fn 0 */

OperationRegion(PIRQ, PCI_Config, 0x60, 0x4)
Scope(\) {
27 changes: 27 additions & 0 deletions recipes-extended/xen/files/libxl-stubdom-nvram.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Index: xen-4.6.4/tools/libxl/libxl_dm.c
===================================================================
--- xen-4.6.4.orig/tools/libxl/libxl_dm.c
+++ xen-4.6.4/tools/libxl/libxl_dm.c
@@ -1159,11 +1159,17 @@ static int libxl__build_device_model_arg
* For other disks we translate devices 0..3 into
* hd[a-d] and ignore the rest.
*/
- if (strncmp(disks[i].vdev, "sd", 2) == 0)
- drive = libxl__sprintf
- (gc, "file=%s,if=scsi,bus=0,unit=%d,format=%s,cache=writeback",
- pdev_path, disk, format);
- else if (disk < 6 && b_info->u.hvm.hdtype == LIBXL_HDTYPE_AHCI) {
+ if (strncmp(disks[i].vdev, "sd", 2) == 0) {
+ if (b_info->stubdomain_version == LIBXL_STUBDOMAIN_VERSION_LINUX) {
+ drive = libxl__sprintf
+ (gc, "file=%s%c,if=scsi,bus=0,unit=%d,format=%s,cache=writeback",
+ "/dev/xvd", 'a'+disk, disk, format);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any way not to have to hard code the device path? Maybe make it a #define variable?

+ } else {
+ drive = libxl__sprintf
+ (gc, "file=%s,if=scsi,bus=0,unit=%d,format=%s,cache=writeback",
+ pdev_path, disk, format);
+ }
+ } else if (disk < 6 && b_info->u.hvm.hdtype == LIBXL_HDTYPE_AHCI) {
flexarray_vappend(dm_args, "-drive",
GCSPRINTF("file=%s,if=none,id=ahcidisk-%d,format=%s,cache=writeback",
pdev_path, disk, format),
25 changes: 25 additions & 0 deletions recipes-extended/xen/files/ovmf-stubdom.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Index: xen-4.6.4/tools/libxl/libxl_dm.c
===================================================================
--- xen-4.6.4.orig/tools/libxl/libxl_dm.c
+++ xen-4.6.4/tools/libxl/libxl_dm.c
@@ -1637,6 +1637,7 @@ void libxl__spawn_stub_dm(libxl__egc *eg
libxl__xs_get_dompath(gc, dm_domid)),
"%d", guest_domid);
if (guest_config->b_info.stubdomain_version == LIBXL_STUBDOMAIN_VERSION_LINUX) {
+#if 0 /* LIES */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is wrong, then just replace it.

/* qemu-xen is used as a dm in the stubdomain, so we set the bios
* according to this */
libxl__xs_write(gc, XBT_NULL,
@@ -1644,6 +1645,12 @@ void libxl__spawn_stub_dm(libxl__egc *eg
libxl__xs_get_dompath(gc, guest_domid)),
"%s",
libxl_bios_type_to_string(LIBXL_BIOS_TYPE_SEABIOS));
+#else
+ libxl__xs_write(gc, XBT_NULL,
+ libxl__sprintf(gc, "%s/hvmloader/bios",
+ libxl__xs_get_dompath(gc, guest_domid)),
+ "%s", libxl_bios_type_to_string(guest_config->b_info.u.hvm.bios));
+#endif
/* OpenXT: We use legacy roms, which is disabled by default in sebios */
libxl__xs_write(gc, XBT_NULL,
libxl__sprintf(gc, "%s/hvmloader/seabios-legacy-load-roms",
14 changes: 14 additions & 0 deletions recipes-extended/xen/files/ovmf.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Index: xen-4.6.4/tools/firmware/hvmloader/Makefile
===================================================================
--- xen-4.6.4.orig/tools/firmware/hvmloader/Makefile
+++ xen-4.6.4/tools/firmware/hvmloader/Makefile
@@ -25,6 +25,9 @@ SUBDIRS := acpi
# The HVM loader is started in 32-bit mode at the address below:
LOADADDR = 0x100000

+OVMF_PATH=${STAGING_LIBDIR}/../share/ovmf/OVMF.fd
+CONFIG_OVMF=y
+
# SMBIOS spec requires format mm/dd/yyyy
SMBIOS_REL_DATE ?= $(shell date +%m/%d/%Y)

9 changes: 9 additions & 0 deletions recipes-extended/xen/xen-common.inc
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,17 @@ SRC_URI_append = " \
file://libxl-atapi-pt.patch \
file://libxl-iso-hotswap.patch \
file://tboot-xen-evtlog-support.patch \
file://fix-nesting-1.patch \
file://fix-nesting-2.patch \
file://ovmf.patch \
file://ovmf-stubdom.patch \
file://hvmloader-nvram.patch \
file://libxl-stubdom-nvram.patch \
"

# This patch adds experimental q35 support for hvmloader
# file://hvmloader-qemu-q35.patch

COMPATIBLE_HOST = 'i686-oe-linux|(x86_64.*).*-linux|aarch64.*-linux'

PACKAGECONFIG =+ "xsm"
Expand Down
4 changes: 4 additions & 0 deletions recipes-extended/xen/xen.bb
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ do_install() {
-i ${D}${sysconfdir}/init.d/xenstored.${PN}-xenstored-c
}

DEPENDS_append = "\
ovmf \
"

RDEPENDS_${PN}-base_remove = "\
${PN}-blktap \
${PN}-libblktapctl \
Expand Down
13 changes: 13 additions & 0 deletions recipes-openxt/qemu-dm/qemu-dm-2.6.2/ahci-disable-ncq.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Index: qemu-2.6.2/hw/ide/ahci.c
===================================================================
--- qemu-2.6.2.orig/hw/ide/ahci.c
+++ qemu-2.6.2/hw/ide/ahci.c
@@ -488,7 +488,7 @@ static void ahci_reg_init(AHCIState *s)
s->control_regs.cap = (s->ports - 1) |
(AHCI_NUM_COMMAND_SLOTS << 8) |
(AHCI_SUPPORTED_SPEED_GEN1 << AHCI_SUPPORTED_SPEED) |
- HOST_CAP_NCQ | HOST_CAP_AHCI;
+ /*HOST_CAP_NCQ |*/ HOST_CAP_AHCI;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just delete and if you feel its needed, add a comment that it was removed and why


s->control_regs.impl = (1 << s->ports) - 1;

Loading