From 691cbe5ba5f77f3759f3491b971a96f9998dcd9d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Apr 2020 18:21:31 +0200 Subject: [PATCH 1/5] ARM: decompressor: move headroom variable out of LC0 Before breaking up LC0 into different pieces, move out the variable that is already place-relative (given that it subtracts 'restart' in the expression) and so its value does not need to be added to the runtime address of the LC0 symbol itself. Signed-off-by: Ard Biesheuvel Reviewed-by: Geert Uytterhoeven Reviewed-by: Nicolas Pitre --- arch/arm/boot/compressed/head.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index cabdd8f4a2482e..42b8d67beab61f 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -287,7 +287,7 @@ not_angel: */ mov r0, pc cmp r0, r4 - ldrcc r0, LC0+28 + ldrcc r0, .Lheadroom addcc r0, r0, pc cmpcc r4, r0 orrcc r4, r4, #1 @ remember we skipped cache_on @@ -664,9 +664,11 @@ LC0: .word LC0 @ r1 .word _got_start @ r11 .word _got_end @ ip .word .L_user_stack_end @ sp - .word _end - restart + 16384 + 1024*1024 .size LC0, . - LC0 +.Lheadroom: + .word _end - restart + 16384 + 1024*1024 + .Linflated_image_size_offset: .long (input_data_end - 4) - . From 161e04a5bae58a65d2b13642845f250888a845a1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Apr 2020 18:21:32 +0200 Subject: [PATCH 2/5] ARM: decompressor: split off _edata and stack base into separate object In preparation of moving the handling of the LC0 object to a later stage in the decompressor startup code, move out _edata and the initial value of the stack pointer, which are needed earlier than the remaining contents of LC0. Signed-off-by: Ard Biesheuvel Reviewed-by: Geert Uytterhoeven Reviewed-by: Nicolas Pitre --- arch/arm/boot/compressed/head.S | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 42b8d67beab61f..5d712e2c000111 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -293,22 +293,20 @@ not_angel: orrcc r4, r4, #1 @ remember we skipped cache_on blcs cache_on -restart: adr r0, LC0 - ldmia r0, {r1, r2, r3, r6, r11, r12} - ldr sp, [r0, #24] +restart: adr r0, LC1 + ldr sp, [r0] + ldr r6, [r0, #4] + add sp, sp, r0 + add r6, r6, r0 - /* - * We might be running at a different address. We need - * to fix up various pointers. - */ + adr r0, LC0 + ldmia r0, {r1, r2, r3, r11, r12} sub r0, r0, r1 @ calculate the delta offset - add r6, r6, r0 @ _edata get_inflated_image_size r9, r10, lr #ifndef CONFIG_ZBOOT_ROM /* malloc space is above the relocated stack (64k max) */ - add sp, sp, r0 add r10, sp, #0x10000 #else /* @@ -660,12 +658,15 @@ not_relocated: mov r0, #0 LC0: .word LC0 @ r1 .word __bss_start @ r2 .word _end @ r3 - .word _edata @ r6 .word _got_start @ r11 .word _got_end @ ip - .word .L_user_stack_end @ sp .size LC0, . - LC0 + .type LC1, #object +LC1: .word .L_user_stack_end - LC1 @ sp + .word _edata - LC1 @ r6 + .size LC1, . - LC1 + .Lheadroom: .word _end - restart + 16384 + 1024*1024 From f1f012b033e6651cd5c5b43d1722976cf6baf973 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Apr 2020 18:21:33 +0200 Subject: [PATCH 3/5] ARM: decompressor: defer loading of the contents of the LC0 structure The remaining contents of LC0 are only used after the point in the decompressor startup code where we enter via 'wont_overwrite'. So move the loading of the LC0 structure after it. This will allow us to jump to wont_overwrite directly from the EFI stub, and execute the decompressor in place at the offset it was loaded by the UEFI firmware. Signed-off-by: Ard Biesheuvel Reviewed-by: Geert Uytterhoeven Reviewed-by: Nicolas Pitre --- arch/arm/boot/compressed/head.S | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 5d712e2c000111..ce442ec5028a0f 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -299,10 +299,6 @@ restart: adr r0, LC1 add sp, sp, r0 add r6, r6, r0 - adr r0, LC0 - ldmia r0, {r1, r2, r3, r11, r12} - sub r0, r0, r1 @ calculate the delta offset - get_inflated_image_size r9, r10, lr #ifndef CONFIG_ZBOOT_ROM @@ -320,9 +316,6 @@ restart: adr r0, LC1 mov r5, #0 @ init dtb size to 0 #ifdef CONFIG_ARM_APPENDED_DTB /* - * r0 = delta - * r2 = BSS start - * r3 = BSS end * r4 = final kernel address (possibly with LSB set) * r5 = appended dtb size (still unknown) * r6 = _edata @@ -330,8 +323,6 @@ restart: adr r0, LC1 * r8 = atags/device tree pointer * r9 = size of decompressed image * r10 = end of this image, including bss/stack/malloc space if non XIP - * r11 = GOT start - * r12 = GOT end * sp = stack pointer * * if there are device trees (dtb) appended to zImage, advance r10 so that the @@ -379,7 +370,6 @@ restart: adr r0, LC1 /* temporarily relocate the stack past the DTB work space */ add sp, sp, r5 - stmfd sp!, {r0-r3, ip, lr} mov r0, r8 mov r1, r6 mov r2, r5 @@ -398,7 +388,6 @@ restart: adr r0, LC1 mov r2, r5 bleq atags_to_fdt - ldmfd sp!, {r0-r3, ip, lr} sub sp, sp, r5 #endif @@ -535,6 +524,10 @@ dtb_check_done: mov pc, r0 wont_overwrite: + adr r0, LC0 + ldmia r0, {r1, r2, r3, r11, r12} + sub r0, r0, r1 @ calculate the delta offset + /* * If delta is zero, we are running at the address we were linked at. * r0 = delta From 35d57d1215ed0da3349180275b845f0c2ee62d08 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Apr 2020 18:21:34 +0200 Subject: [PATCH 4/5] ARM: decompressor: move GOT into .data for EFI enabled builds We will be running the decompressor in place after a future patch, instead of copying it around first. This means we no longer have to disable and re-enable the MMU and caches either. However, this means we will be loaded with the restricted permissions set by the UEFI firmware, which means that we have to move the GOT table into the data section in order for the contents to be writable by the code itself. Signed-off-by: Ard Biesheuvel Reviewed-by: Nicolas Pitre --- arch/arm/boot/compressed/vmlinux.lds.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index b247f399de711b..d0619ec057052d 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -63,9 +63,11 @@ SECTIONS _etext = .; .got.plt : { *(.got.plt) } +#ifndef CONFIG_EFI_STUB _got_start = .; .got : { *(.got) } _got_end = .; +#endif /* ensure the zImage file size is always a multiple of 64 bits */ /* (without a dummy byte, ld just ignores the empty section) */ @@ -74,6 +76,9 @@ SECTIONS #ifdef CONFIG_EFI_STUB .data : ALIGN(4096) { __pecoff_data_start = .; + _got_start = .; + *(.got) + _got_end = .; /* * The EFI stub always executes from RAM, and runs strictly before the * decompressor, so we can make an exception for its r/w data, and keep it From d0f9ca9be11f25ef4151195eab7ea36d136084f6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Apr 2020 18:21:35 +0200 Subject: [PATCH 5/5] ARM: decompressor: run decompressor in place if loaded via UEFI The decompressor can load from anywhere in memory, and the only reason the EFI stub code relocates it is to ensure it appears within the first 128 MiB of memory, so that the uncompressed kernel ends up at the right offset in memory. We can short circuit this, and simply jump into the decompressor startup code at the point where it knows where the base of memory lives. This also means there is no need to disable the MMU and caches, create new page tables and re-enable them. Signed-off-by: Ard Biesheuvel Reviewed-by: Nicolas Pitre --- arch/arm/boot/compressed/head.S | 39 +++++++------------- drivers/firmware/efi/libstub/arm32-stub.c | 45 +++-------------------- 2 files changed, 20 insertions(+), 64 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index ce442ec5028a0f..c79db44ba1284b 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1430,37 +1430,26 @@ reloc_code_end: #ifdef CONFIG_EFI_STUB ENTRY(efi_enter_kernel) - mov r7, r0 @ preserve image base - mov r4, r1 @ preserve DT pointer + mov r4, r0 @ preserve image base + mov r8, r1 @ preserve DT pointer - mov r0, r4 @ DT start - add r1, r4, r2 @ DT end - bl cache_clean_flush + mrc p15, 0, r0, c1, c0, 0 @ read SCTLR + tst r0, #0x1 @ MMU enabled? + orreq r4, r4, #1 @ set LSB if not - mov r0, r7 @ relocated zImage - ldr r1, =_edata @ size of zImage - add r1, r1, r0 @ end of zImage + mov r0, r8 @ DT start + add r1, r8, r2 @ DT end bl cache_clean_flush - @ The PE/COFF loader might not have cleaned the code we are - @ running beyond the PoU, and so calling cache_off below from - @ inside the PE/COFF loader allocated region is unsafe unless - @ we explicitly clean it to the PoC. - adr r0, call_cache_fn @ region of code we will - adr r1, 0f @ run with MMU off - bl cache_clean_flush - bl cache_off + adr r0, 0f @ switch to our stack + ldr sp, [r0] + add sp, sp, r0 - @ Set parameters for booting zImage according to boot protocol - @ put FDT address in r2, it was returned by efi_entry() - @ r1 is the machine type, and r0 needs to be 0 - mov r0, #0 - mov r1, #0xFFFFFFFF - mov r2, r4 - add r7, r7, #(__efi_start - start) - mov pc, r7 @ no mode switch + mov r5, #0 @ appended DTB size + mov r7, #0xFFFFFFFF @ machine ID + b wont_overwrite ENDPROC(efi_enter_kernel) -0: +0: .long .L_user_stack_end - . #endif .align diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c index 7826553af2ba2c..0050d811bf20f7 100644 --- a/drivers/firmware/efi/libstub/arm32-stub.c +++ b/drivers/firmware/efi/libstub/arm32-stub.c @@ -199,14 +199,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, unsigned long kernel_base; efi_status_t status; - /* - * Verify that the DRAM base address is compatible with the ARM - * boot protocol, which determines the base of DRAM by masking - * off the low 27 bits of the address at which the zImage is - * loaded. These assumptions are made by the decompressor, - * before any memory map is available. - */ - kernel_base = round_up(dram_base, SZ_128M); + /* use a 16 MiB aligned base for the decompressed kernel */ + kernel_base = round_up(dram_base, SZ_16M) + TEXT_OFFSET; /* * Note that some platforms (notably, the Raspberry Pi 2) put @@ -215,41 +209,14 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, * base of the kernel image is only partially used at the moment. * (Up to 5 pages are used for the swapper page tables) */ - kernel_base += TEXT_OFFSET - 5 * PAGE_SIZE; - - status = reserve_kernel_base(kernel_base, reserve_addr, reserve_size); + status = reserve_kernel_base(kernel_base - 5 * PAGE_SIZE, reserve_addr, + reserve_size); if (status != EFI_SUCCESS) { pr_efi_err("Unable to allocate memory for uncompressed kernel.\n"); return status; } - /* - * Relocate the zImage, so that it appears in the lowest 128 MB - * memory window. - */ - *image_addr = (unsigned long)image->image_base; - *image_size = image->image_size; - status = efi_relocate_kernel(image_addr, *image_size, *image_size, - kernel_base + MAX_UNCOMP_KERNEL_SIZE, 0, 0); - if (status != EFI_SUCCESS) { - pr_efi_err("Failed to relocate kernel.\n"); - efi_free(*reserve_size, *reserve_addr); - *reserve_size = 0; - return status; - } - - /* - * Check to see if we were able to allocate memory low enough - * in memory. The kernel determines the base of DRAM from the - * address at which the zImage is loaded. - */ - if (*image_addr + *image_size > dram_base + ZIMAGE_OFFSET_LIMIT) { - pr_efi_err("Failed to relocate kernel, no low memory available.\n"); - efi_free(*reserve_size, *reserve_addr); - *reserve_size = 0; - efi_free(*image_size, *image_addr); - *image_size = 0; - return EFI_LOAD_ERROR; - } + *image_addr = kernel_base; + *image_size = 0; return EFI_SUCCESS; }