Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow elfloader to put kernel images directly after firmware on RISCV #135

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions cmake-tool/helpers/application_settings.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ function(ApplyData61ElfLoaderSettings kernel_platform kernel_sel4_arch)
if(KernelPlatformZynqmp AND KernelSel4ArchAarch32)
set(IMAGE_START_ADDR 0x8000000 CACHE INTERNAL "" FORCE)
endif()
if(KernelPlatformSpike AND KernelSel4ArchRiscV32)
set(IMAGE_START_ADDR 0x80400000 CACHE INTERNAL "" FORCE)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this can be removed because of the DTS overlay change in another PR?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, it was being ignored by opensbi before and wasn't having any effect.

endif()
endfunction()

function(ApplyCommonSimulationSettings kernel_sel4_arch)
Expand Down
2 changes: 2 additions & 0 deletions elfloader-tool/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,8 @@ if(DEFINED KernelDTBPath)
set_property(SOURCE src/drivers/driver.c PROPERTY OBJECT_DEPENDS ${DEVICES_GEN_H})
endif()

set_property(SOURCE src/common.c PROPERTY OBJECT_DEPENDS ${IMAGE_START_ADDR_H})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this really necessary? With this PR #include <image_start_addr.h> is added to common.c, so it should be part of the normal dependency generation.


# Generate linker script
separate_arguments(c_arguments NATIVE_COMMAND "${CMAKE_C_FLAGS}")
# Add extra compilation flags required for clang
Expand Down
72 changes: 3 additions & 69 deletions elfloader-tool/src/arch-arm/64/crt0.S
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@

#include <autoconf.h>
#include <elfloader/gen_config.h>
#ifdef CONFIG_IMAGE_BINARY
#include <image_start_addr.h>
#endif

#include <assembler.h>

Expand All @@ -26,12 +23,13 @@ BEGIN_FUNC(_start)
* Binary images may not be loaded in the correct location.
* Try and move ourselves so we're in the right place.
*/
mov x0, #0 /* Arg 0 of fixup_image_base is NULL */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't there a possibility on ARM also that we get passed a DTB here, and thus we have to check that it's not overwritten?

bl fixup_image_base
mov x2, x0
/* restore original arguments for next step */
ldp x0, x1, [sp, #-16]!
/* fixup_image_base returns 1 if no need to move */
cmp x2, #1
/* fixup_image_base returns 0 if no need to move */
cmp x2, #0
beq 1f
/* otherwise, jump to the start of the new elfloader */

Expand All @@ -41,70 +39,6 @@ BEGIN_FUNC(_start)
b main
END_FUNC(_start)

#ifdef CONFIG_IMAGE_BINARY
BEGIN_FUNC(fixup_image_base)
stp x29, x30, [sp, #-16]!
mov x29, sp
ldr x0, =IMAGE_START_ADDR
axel-h marked this conversation as resolved.
Show resolved Hide resolved
adr x1, _start
cmp x0, x1
beq image_ok

adrp x2, _end
add x2, x2, #:lo12:_end
sub x2, x2, x1

/* sanity check: don't want to overwrite ourselves! we assume
* everything between _start and _archive_start is important
* (i.e. code that might be run while relocating)
* but allow overlap for things after _archive_start.
*/
adrp x3, _archive_start
add x3, x3, #:lo12:_archive_start

add x4, x0, x2 /* Dest end */

/* check: if (end < archive_start && end >= _start) { abort } */
cmp x4, x3
bge 1f

cmp x4, x1
blt 1f

b cant_reloc

1:
/* check: if (dest < archive_start && dest >= _start) { abort } */
cmp x0, x3
bge 2f

cmp x0, x1
blt 2f

cant_reloc:
b abort

2:
/* x0 = desired image base */
/* x1 = current image space */
/* x2 = image size */
bl memmove
/* x0 = dest, save it to a callee-saved register while we invalidate icache */
mov x19, x0
bl flush_dcache
bl invalidate_icache
mov x0, x19
b 1f

image_ok:
/* already in the right place, just keep booting */
mov x0, #1
1:
ldp x29, x30, [sp], #16
ret
END_FUNC(fixup_image_base)
#endif

/* Move the elf loader out of the kernel's way */
BEGIN_FUNC(finish_relocation)
/*
Expand Down
26 changes: 10 additions & 16 deletions elfloader-tool/src/arch-riscv/boot.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
#define PT_LEVEL_2 2

#define PT_LEVEL_1_BITS 30
#if __riscv_xlen == 32
#define PT_LEVEL_2_BITS 22
axel-h marked this conversation as resolved.
Show resolved Hide resolved
#else
#define PT_LEVEL_2_BITS 21
#endif

#define PTE_TYPE_TABLE 0x00
#define PTE_TYPE_SRWX 0xCE
Expand All @@ -46,8 +50,6 @@

#define GET_PT_INDEX(addr, n) (((addr) >> (((PT_INDEX_BITS) * ((CONFIG_PT_LEVELS) - (n))) + RISCV_PGSHIFT)) % PTES_PER_PT)

#define VIRT_PHYS_ALIGNED(virt, phys, level_bits) (IS_ALIGNED((virt), (level_bits)) && IS_ALIGNED((phys), (level_bits)))

struct image_info kernel_info;
struct image_info user_info;

Expand All @@ -57,7 +59,10 @@ unsigned long l2pt[PTES_PER_PT] __attribute__((aligned(4096)));
unsigned long l2pt_elf[PTES_PER_PT] __attribute__((aligned(4096)));
#endif

char elfloader_stack_alloc[BIT(CONFIG_KERNEL_STACK_BITS)];
/* This stack cannot go in the .bss section because it's already in use when the
* .bss section is zeroed.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder, don't we have the same problem on ARM also then? Seem implementing clear_bss in assembly, so it does not use a stack is the cleanest way to avoid running into issues here.

*/
char elfloader_stack_alloc[BIT(CONFIG_KERNEL_STACK_BITS)] __attribute__((section(".data"))) ;
kent-mcleod marked this conversation as resolved.
Show resolved Hide resolved

/* first HART will initialise these */
void const *dtb = NULL;
Expand Down Expand Up @@ -89,11 +94,6 @@ static int map_kernel_window(struct image_info *kernel_info)

/* Map the elfloader into the new address space */

if (!IS_ALIGNED((uintptr_t)_text, PT_LEVEL_2_BITS)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you explain why the check existed previously and why it can be removed a bit?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the kernel also could only handle setting up the kernel mappings if the image was on a page boundary and because the elfloader was always placed on a page boundary by the bbl/opensbi implementation. The rest of map_kernel_window was specialized to these assumptions and so had the checks to catch when there was divergence. Now that divergence can be handled by the code, the checks should be able to be removed.

printf("ERROR: ELF Loader not properly aligned\n");
return -1;
}

index = GET_PT_INDEX((uintptr_t)_text, PT_LEVEL_1);

#if __riscv_xlen == 32
Expand All @@ -105,18 +105,12 @@ static int map_kernel_window(struct image_info *kernel_info)
#endif

for (unsigned int page = 0; index < PTES_PER_PT; index++, page++) {
lpt[index] = PTE_CREATE_LEAF((uintptr_t)_text +
lpt[index] = PTE_CREATE_LEAF(ROUND_DOWN((uintptr_t)_text, PT_LEVEL_2_BITS) +
(page << PT_LEVEL_2_BITS));
}

/* Map the kernel into the new address space */

if (!VIRT_PHYS_ALIGNED(kernel_info->virt_region_start,
kernel_info->phys_region_start, PT_LEVEL_2_BITS)) {
printf("ERROR: Kernel not properly aligned\n");
return -1;
}

index = GET_PT_INDEX(kernel_info->virt_region_start, PT_LEVEL_1);

#if __riscv_xlen == 64
Expand All @@ -126,7 +120,7 @@ static int map_kernel_window(struct image_info *kernel_info)
#endif

for (unsigned int page = 0; index < PTES_PER_PT; index++, page++) {
lpt[index] = PTE_CREATE_LEAF(kernel_info->phys_region_start +
lpt[index] = PTE_CREATE_LEAF(ROUND_DOWN(kernel_info->phys_region_start, PT_LEVEL_2_BITS) +
(page << PT_LEVEL_2_BITS));
}

Expand Down
18 changes: 16 additions & 2 deletions elfloader-tool/src/arch-riscv/crt0.S
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,24 @@ _start:
/* Attach the stack to sp before calling any C functions */
la sp, (elfloader_stack_alloc + BIT(12))
axel-h marked this conversation as resolved.
Show resolved Hide resolved

#ifdef CONFIG_IMAGE_BINARY
/*
* Binary images may not be loaded in the correct location.
* Try and move ourselves so we're in the right place.
*/
mv a0, a1 /* Pass dtb as first argument */
jal fixup_image_base
/* fixup_image_base returns 0 if no need to move */
beqz a0, 1f

/* otherwise, restore args and jump to the start of the new elfloader */
mv a2, a0
mv a0, s0
mv a1, s2
jr a2

/* Clear the BSS before we get to do anything more specific */
1:
jal clear_bss
#endif

/* Check if the Heart State Management (HSM) extension exists, so it can be
* used to switch harts if we are not running on hart CONFIG_FIRST_HART_ID.
Expand Down
36 changes: 6 additions & 30 deletions elfloader-tool/src/arch-riscv/linker.lds
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@
* SPDX-License-Identifier: GPL-2.0-only
*/

/*
* NOTE: For RISC-V 32-bit, having the .rodata section before the .data section causes the elfloader to
* freeze up. Thus, for 32-bit we move the .rodata section to after the .bss section as previously
* before some refactoring. The 64-bit version does not seem to be affect by this issue and uses thus
* uses the new layout.
*/

#include <autoconf.h>
#include <elfloader/gen_config.h>
#include "image_start_addr.h"
Expand All @@ -29,7 +22,6 @@ SECTIONS
{
*(.text)
}
#ifdef CONFIG_ARCH_RISCV64
. = ALIGN(16);
.rodata :
{
Expand All @@ -40,12 +32,7 @@ SECTIONS
/*
* ld crashes when we add this here: *(_driver_list)
*/
. = ALIGN(16);
_archive_start = .;
*(._archive_cpio)
_archive_end = .;
}
#endif
. = ALIGN(16);
.data :
{
Expand All @@ -55,6 +42,12 @@ SECTIONS
*(.data.*)
}
. = ALIGN(16);
._archive_cpio : {
_archive_start = .;
*(._archive_cpio)
_archive_end = .;
}
. = ALIGN(16);
.bss :
{
_bss = .;
Expand All @@ -63,22 +56,5 @@ SECTIONS
*(.bss.*)
_bss_end = .;
}
#ifdef CONFIG_ARCH_RISCV32
. = ALIGN(16);
.rodata :
{
*(.srodata*)
. = ALIGN(16);
*(.rodata)
*(.rodata.*)
/*
* ld crashes when we add this here: *(_driver_list)
*/
. = ALIGN(16);
_archive_start = .;
*(._archive_cpio)
_archive_end = .;
}
#endif
_end = .;
}
69 changes: 69 additions & 0 deletions elfloader-tool/src/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,13 @@
#include <platform_info.h> // this provides memory_region
#endif

#include <image_start_addr.h>
#include <abort.h>

extern char _bss[];
extern char _bss_end[];
extern char _start[];
extern char _end[];

/*
* Clear the BSS segment
Expand Down Expand Up @@ -618,3 +623,67 @@ WEAK void platform_init(void)
{
/* nothing by default */
}

#ifdef CONFIG_ARCH_ARM
extern void flush_dcache(void);
extern void invalidate_icache(void);
#endif

/*
* Moves the elfloader to its expected start address in memory.
* This is for when the elfloader needs to be in a specific location
* to ensure that it doesn't occupy memory that it needs to load the
* kernel and user applications into.
*/
char *fixup_image_base(char const *fdt)
{
char *load_base = _start;
char *target_base = (char *) IMAGE_START_ADDR;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
char *target_base = (char *) IMAGE_START_ADDR;
char *target_base = (char *)IMAGE_START_ADDR;

if (load_base == target_base) {
/* already in the right place, just keep booting */
return (char *)0;
}

/* Check that the current image location doesn't overlap with the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could copy backwards if they overlap, just not sure if that is necessary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually the memmove implementation in the elfloader can handle this. I originally thought that it was just the same implementation of memcpy. I can fix this up to handle overlap apart from the code segment apart so that it's like the original assembly (which I originally thought was not properly handling overlap).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't the issue when there is overlapping, there is chance that the code that is currently executing is not overwritten and thus the result might be undefined? So this must be detected and avoided. Same for overwriting the stack/global in bss

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've fixed this up to only check for collision up until _archive_start.

* destination location. */
size_t image_size = _end - _start;
size_t code_size = _archive_start - _start;
char *target_end = target_base + image_size;
if (image_size < code_size) {
/* This shouldn't happen as image size must be greater than
* code size. This allows the region check below to only consider
* if the smaller region is inside the larger region. */
abort();
}
if ((_start >= target_base && _start < target_end) ||
(_archive_start >= target_base && _archive_start < target_end)) {
abort();
}

/* If a fdt was passed in from the previous boot stage, also
* check that it isn't overwritten.
*/
if (fdt) {
size_t dtb_size = fdt_size(fdt);
const char *fdt_end = fdt + dtb_size;
if (image_size < dtb_size) {
/* Assume the dtb is smaller than the images size to
* simplify region check */
abort();
}
if ((fdt >= target_base && fdt < target_end) ||
(fdt_end >= target_base && fdt_end < target_end)) {
abort();
}
}


/* Perform the move and clean/invalidate caches if necessary */
char *ret = memmove(target_base, load_base, image_size);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Drop ret and use target_base instead, so it's easier to understand what this does. We could assert ro abort if memmove() does not return target_base, because that indicated something is terribly broken.

#ifdef CONFIG_ARCH_ARM
flush_dcache();
invalidate_icache();
#endif
return ret;

}