diff --git a/elfloader-tool/include/arch-riscv/sbi.h b/elfloader-tool/include/arch-riscv/sbi.h index 3cf90b03..9c87057d 100644 --- a/elfloader-tool/include/arch-riscv/sbi.h +++ b/elfloader-tool/include/arch-riscv/sbi.h @@ -19,10 +19,10 @@ #define SBI_SHUTDOWN 8 #define SBI_CALL(which, arg0, arg1, arg2) ({ \ - register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); \ - register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); \ - register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); \ - register uintptr_t a7 asm ("a7") = (uintptr_t)(which); \ + register word_t a0 asm ("a0") = (word_t)(arg0); \ + register word_t a1 asm ("a1") = (word_t)(arg1); \ + register word_t a2 asm ("a2") = (word_t)(arg2); \ + register word_t a7 asm ("a7") = (word_t)(which); \ asm volatile ("ecall" \ : "+r" (a0) \ : "r" (a1), "r" (a2), "r" (a7) \ @@ -34,11 +34,11 @@ #define SBI_HSM_HART_START 0 #define SBI_EXT_CALL(extension, which, arg0, arg1, arg2) ({ \ - register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); \ - register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); \ - register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); \ - register uintptr_t a6 asm ("a6") = (uintptr_t)(which); \ - register uintptr_t a7 asm ("a7") = (uintptr_t)(extension); \ + register word_t a0 asm ("a0") = (word_t)(arg0); \ + register word_t a1 asm ("a1") = (word_t)(arg1); \ + register word_t a2 asm ("a2") = (word_t)(arg2); \ + register word_t a6 asm ("a6") = (word_t)(which); \ + register word_t a7 asm ("a7") = (word_t)(extension); \ asm volatile ("ecall" \ : "+r" (a0) \ : "r" (a1), "r" (a2), "r" (a6), "r" (a7) \ @@ -86,34 +86,34 @@ static inline void sbi_clear_ipi(void) SBI_CALL_0(SBI_CLEAR_IPI); } -static inline void sbi_send_ipi(const unsigned long *hart_mask) +static inline void sbi_send_ipi(const word_t *hart_mask) { SBI_CALL_1(SBI_SEND_IPI, hart_mask); } -static inline void sbi_remote_fence_i(const unsigned long *hart_mask) +static inline void sbi_remote_fence_i(const word_t *hart_mask) { SBI_CALL_1(SBI_REMOTE_FENCE_I, hart_mask); } -static inline void sbi_remote_sfence_vma(const unsigned long *hart_mask, - UNUSED unsigned long start, - UNUSED unsigned long size) +static inline void sbi_remote_sfence_vma(const word_t *hart_mask, + UNUSED word_t start, + UNUSED word_t size) { SBI_CALL_1(SBI_REMOTE_SFENCE_VMA, hart_mask); } -static inline void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, - UNUSED unsigned long start, - UNUSED unsigned long size, - UNUSED unsigned long asid) +static inline void sbi_remote_sfence_vma_asid(const word_t *hart_mask, + UNUSED word_t start, + UNUSED word_t size, + UNUSED word_t asid) { SBI_CALL_1(SBI_REMOTE_SFENCE_VMA_ASID, hart_mask); } -static inline void sbi_hart_start(const unsigned long hart_id, - void (*start)(unsigned long), - unsigned long privilege) +static inline void sbi_hart_start(const word_t hart_id, + void (*start)(word_t hart_id, word_t arg), + word_t arg) { - SBI_HSM_CALL(SBI_HSM_HART_START, hart_id, start, privilege); + SBI_HSM_CALL(SBI_HSM_HART_START, hart_id, start, arg); } diff --git a/elfloader-tool/src/arch-riscv/boot.c b/elfloader-tool/src/arch-riscv/boot.c index f485594a..83605408 100644 --- a/elfloader-tool/src/arch-riscv/boot.c +++ b/elfloader-tool/src/arch-riscv/boot.c @@ -57,12 +57,45 @@ unsigned long l2pt[PTES_PER_PT] __attribute__((aligned(4096))); unsigned long l2pt_elf[PTES_PER_PT] __attribute__((aligned(4096))); #endif -char elfloader_stack_alloc[BIT(CONFIG_KERNEL_STACK_BITS)]; +/* Stacks for each core are set up in the assembly startup code. */ +char elfloader_stack[CONFIG_MAX_NUM_NODES * BIT(CONFIG_KERNEL_STACK_BITS)] __attribute__((aligned(4096))); /* first HART will initialise these */ void const *dtb = NULL; size_t dtb_size = 0; +static inline void sfence_vma(void) +{ + asm volatile("sfence.vma" ::: "memory"); +} + +static inline void ifence(void) +{ + asm volatile("fence.i" ::: "memory"); +} + +#if CONFIG_PT_LEVELS == 2 +uint64_t vm_mode = 0x1llu << 31; +#elif CONFIG_PT_LEVELS == 3 +uint64_t vm_mode = 0x8llu << 60; +#elif CONFIG_PT_LEVELS == 4 +uint64_t vm_mode = 0x9llu << 60; +#else +#error "Wrong PT level" +#endif + +static inline void enable_virtual_memory(void) +{ + sfence_vma(); + asm volatile( + "csrw satp, %0\n" + : + : "r"(vm_mode | (uintptr_t)l1pt >> RISCV_PGSHIFT) + : + ); + ifence(); +} + /* * overwrite the default implementation for abort() */ @@ -133,64 +166,70 @@ static int map_kernel_window(struct image_info *kernel_info) return 0; } -#if CONFIG_PT_LEVELS == 2 -uint64_t vm_mode = 0x1llu << 31; -#elif CONFIG_PT_LEVELS == 3 -uint64_t vm_mode = 0x8llu << 60; -#elif CONFIG_PT_LEVELS == 4 -uint64_t vm_mode = 0x9llu << 60; -#else -#error "Wrong PT level" -#endif - -int hsm_exists = 0; +int hsm_exists = 0; /* assembly startup code will initialise this */ #if CONFIG_MAX_NUM_NODES > 1 -extern void secondary_harts(unsigned long); +extern void secondary_harts(word_t hart_id, word_t core_id); int secondary_go = 0; -int next_logical_core_id = 1; +int next_logical_core_id = 1; /* incremented by assembly code */ int mutex = 0; int core_ready[CONFIG_MAX_NUM_NODES] = { 0 }; -static void set_and_wait_for_ready(int hart_id, int core_id) + +static void acquire_multicore_lock(void) +{ + while (__atomic_exchange_n(&mutex, 1, __ATOMIC_ACQUIRE) != 0) { + /* busy waiting loop */ + } +} + +static void release_multicore_lock(void) { - /* Acquire lock to update core ready array */ - while (__atomic_exchange_n(&mutex, 1, __ATOMIC_ACQUIRE) != 0); - printf("Hart ID %d core ID %d\n", hart_id, core_id); - core_ready[core_id] = 1; __atomic_store_n(&mutex, 0, __ATOMIC_RELEASE); +} - /* Wait untill all cores are go */ - for (int i = 0; i < CONFIG_MAX_NUM_NODES; i++) { - while (__atomic_load_n(&core_ready[i], __ATOMIC_RELAXED) == 0) ; +static void set_secondary_cores_go(void) +{ + __atomic_store_n(&secondary_go, 1, __ATOMIC_RELEASE); +} + +static void block_until_secondary_cores_go(void) +{ + while (__atomic_load_n(&secondary_go, __ATOMIC_ACQUIRE) == 0) { + /* busy waiting loop */ } } -#endif -static inline void sfence_vma(void) +static void mark_core_ready(int core_id) { - asm volatile("sfence.vma" ::: "memory"); + core_ready[core_id] = 1; } -static inline void ifence(void) +static int is_core_ready(int core_id) { - asm volatile("fence.i" ::: "memory"); + return (0 != __atomic_load_n(&core_ready[core_id], __ATOMIC_RELAXED)); } -static inline void enable_virtual_memory(void) +static void set_and_wait_for_ready(word_t hart_id, word_t core_id) { - sfence_vma(); - asm volatile( - "csrw satp, %0\n" - : - : "r"(vm_mode | (uintptr_t)l1pt >> RISCV_PGSHIFT) - : - ); - ifence(); + /* Acquire lock to update core ready array */ + acquire_multicore_lock(); + printf("Hart ID %"PRIu_word" core ID %"PRIu_word"\n", hart_id, core_id); + mark_core_ready(core_id); + release_multicore_lock(); + + /* Wait until all cores are go */ + for (int i = 0; i < CONFIG_MAX_NUM_NODES; i++) { + while (!is_core_ready(i)) { + /* busy waiting loop */ + } + } } -static int run_elfloader(UNUSED int hart_id, void *bootloader_dtb) +#endif /* CONFIG_MAX_NUM_NODES > 1 */ + +static int run_elfloader(UNUSED word_t hart_id, void *bootloader_dtb) { int ret; @@ -216,24 +255,21 @@ static int run_elfloader(UNUSED int hart_id, void *bootloader_dtb) } #if CONFIG_MAX_NUM_NODES > 1 - while (__atomic_exchange_n(&mutex, 1, __ATOMIC_ACQUIRE) != 0); - printf("Main entry hart_id:%d\n", hart_id); - __atomic_store_n(&mutex, 0, __ATOMIC_RELEASE); - + acquire_multicore_lock(); + printf("Main entry hart_id:%"PRIu_word"\n", hart_id); + release_multicore_lock(); /* Unleash secondary cores */ - __atomic_store_n(&secondary_go, 1, __ATOMIC_RELEASE); - + set_secondary_cores_go(); /* Start all cores */ - int i = 0; + word_t i = 0; while (i < CONFIG_MAX_NUM_NODES && hsm_exists) { i++; if (i != hart_id) { sbi_hart_start(i, secondary_harts, i); } } - set_and_wait_for_ready(hart_id, 0); -#endif +#endif /* CONFIG_MAX_NUM_NODES > 1 */ printf("Enabling MMU and paging\n"); enable_virtual_memory(); @@ -249,7 +285,7 @@ static int run_elfloader(UNUSED int hart_id, void *bootloader_dtb) , hart_id, 0 -#endif +#endif /* CONFIG_MAX_NUM_NODES > 1 */ ); /* We should never get here. */ @@ -259,19 +295,15 @@ static int run_elfloader(UNUSED int hart_id, void *bootloader_dtb) #if CONFIG_MAX_NUM_NODES > 1 -void secondary_entry(int hart_id, int core_id) +void secondary_entry(word_t hart_id, word_t core_id) { - while (__atomic_load_n(&secondary_go, __ATOMIC_ACQUIRE) == 0) ; - - while (__atomic_exchange_n(&mutex, 1, __ATOMIC_ACQUIRE) != 0); - printf("Secondary entry hart_id:%d core_id:%d\n", hart_id, core_id); - __atomic_store_n(&mutex, 0, __ATOMIC_RELEASE); - + block_until_secondary_cores_go(); + acquire_multicore_lock(); + printf("Secondary entry hart_id:%"PRIu_word" core_id:%"PRIu_word"\n", + hart_id, core_id); + release_multicore_lock(); set_and_wait_for_ready(hart_id, core_id); - enable_virtual_memory(); - - /* If adding or modifying these parameters you will need to update the registers in head.S */ ((init_riscv_kernel_t)kernel_info.virt_entry)(user_info.phys_region_start, @@ -285,13 +317,13 @@ void secondary_entry(int hart_id, int core_id) ); } -#endif +#endif /* CONFIG_MAX_NUM_NODES > 1 */ -void main(int hart_id, void *bootloader_dtb) +void main(word_t hart_id, void *bootloader_dtb) { /* Printing uses SBI, so there is no need to initialize any UART. */ - printf("ELF-loader started on (HART %d) (NODES %d)\n", - hart_id, CONFIG_MAX_NUM_NODES); + printf("ELF-loader started on (HART %"PRIu_word") (NODES %d)\n", + hart_id, (unsigned int)CONFIG_MAX_NUM_NODES); printf(" paddr=[%p..%p]\n", _text, _end - 1); diff --git a/elfloader-tool/src/arch-riscv/crt0.S b/elfloader-tool/src/arch-riscv/crt0.S index 3447cc38..2814abb6 100644 --- a/elfloader-tool/src/arch-riscv/crt0.S +++ b/elfloader-tool/src/arch-riscv/crt0.S @@ -9,8 +9,11 @@ .extern main .extern __global_pointer$ -.extern elfloader_stack_alloc +.extern elfloader_stack .extern hsm_exists +#if CONFIG_MAX_NUM_NODES > 1 +.extern next_logical_core_id +#endif #define BIT(n) (1 << (n)) @@ -56,7 +59,7 @@ _start: mv s2, a1 /* preserve a1 (dtb) in s2 */ /* Attach the stack to sp before calling any C functions */ - la sp, (elfloader_stack_alloc + BIT(12)) + la sp, (elfloader_stack + BIT(CONFIG_KERNEL_STACK_BITS)) #ifdef CONFIG_IMAGE_BINARY /* Clear the BSS before we get to do anything more specific */ @@ -104,37 +107,28 @@ hsm_switch_hart: mv a0, s0 /* restore a0 to hold hart ID passed by OpenSBI */ j secondary_harts - -_start1: /* a0 must hold current hard ID passed by bootloader */ - /* a1 must hold dtb address passed by bootloader */ +/*----------------------------------------------------------------------------*/ +_start1: +/* This is basically an asm wrapper to jump to the C code at main(). The + * registers are already set up with the perameters for the C code: + * a0 holds current hard ID passed by bootloader + * a1 holds dtb address passed by bootloader + * All that is left to be done here is setting up the registers gp and sp to + * have a proper C environment. The hart we are running on now could be a + * different HART to the one that we have been on in _start. The original hart + * we came from will get a different stack in secondary_harts. + */ .option push .option norelax 1:auipc gp, %pcrel_hi(__global_pointer$) addi gp, gp, %pcrel_lo(1b) .option pop - - /* Attach the stack to sp before calling any C functions */ - /* This HART may be a different HART to the one that started at _start - * If we've switched HARTs then the other HART will get a different stack - * region in secondary_harts. */ - la sp, (elfloader_stack_alloc + BIT(12)) - /* The C code expects the registers to be set up as: - * a0 = hart id - * a1 = dtb - */ + la sp, (elfloader_stack + BIT(CONFIG_KERNEL_STACK_BITS)) + /* Jump via a register, as this can cover a bigger range. */ la s0, main jr s0 -#if CONFIG_MAX_NUM_NODES > 1 -.extern next_logical_core_id -.data -bootstack_secondary_cores: -.align 12 -.space 4096 * (CONFIG_MAX_NUM_NODES - 1) -#endif - -.text - +/*----------------------------------------------------------------------------*/ .global secondary_harts secondary_harts: @@ -147,14 +141,16 @@ secondary_harts: #if CONFIG_MAX_NUM_NODES > 1 la a1, next_logical_core_id li t2, 1 - amoadd.w t0, t2, (a1) - /* now a1 has the logical core id */ + /* atomically increment next_logical_core_id by one, afterwards a1 holds value + * before the update - which is our logical core ID. + */ + amoadd.w a1, t2, (a1) li t2, CONFIG_MAX_NUM_NODES - bge t0, t2, hsm_suspend_hart - - mv a1, t0 - slli t0, t0, 12 - la sp, bootstack_secondary_cores + bge a1, t2, hsm_suspend_hart + /* setup the hart specific stack pointer */ + la sp, elfloader_stack + addi t0, a1, 1 /* increment by one because we need to set sp to the end */ + slli t0, t0, CONFIG_KERNEL_STACK_BITS /* t0 = t0 * BIT(CONFIG_KERNEL_STACK_BITS) */ add sp, sp, t0 la s0, secondary_entry jr s0