diff --git a/arch/arm64/arch.c b/arch/arm64/arch.c index fe670cdb9..54bc14bdf 100644 --- a/arch/arm64/arch.c +++ b/arch/arm64/arch.c @@ -31,7 +31,9 @@ #include #include #include +#include #include +#include #define LOCAL_TRACE 0 @@ -41,6 +43,8 @@ static spin_lock_t arm_boot_cpu_lock = 1; static volatile int secondaries_to_init = 0; #endif +#define SECTION_SIZE (1024 * 1024) + static void arm64_cpu_early_init(void) { /* set the vector base */ @@ -93,7 +97,76 @@ void arch_idle(void) void arch_chain_load(void *entry, ulong arg0, ulong arg1, ulong arg2, ulong arg3) { - PANIC_UNIMPLEMENTED; + int ret; + LTRACEF("entry %p, args 0x%lx 0x%lx 0x%lx 0x%lx\n", entry, arg0, arg1, arg2, arg3); + + /* we are going to shut down the system, start by disabling interrupts */ + arch_disable_ints(); + + /* give target and platform a chance to put hardware into a suitable + * state for chain loading. + */ + target_quiesce(); + platform_quiesce(); + + paddr_t entry_pa; + paddr_t loader_pa; + paddr_t loader_payam; + +#if WITH_KERNEL_VM + /* get the physical address of the entry point we're going to branch to */ + entry_pa = vaddr_to_paddr((addr_t)entry); + if (entry_pa == 0) { + panic("error translating entry physical address\n"); + } + + /* add the low bits of the virtual address back */ + entry_pa |= ((addr_t)entry & 0xfff); + + LTRACEF("entry pa 0x%lx\n", entry_pa); + + /* figure out the mapping for the chain load routine */ + loader_pa = vaddr_to_paddr((addr_t)&arm_chain_load); + if (loader_pa == 0) { + panic("error translating loader physical address\n"); + } + + /* add the low bits of the virtual address back */ + loader_pa |= ((addr_t)&arm_chain_load & 0xfff); + + paddr_t loader_pa_section = ROUNDDOWN(loader_pa, SECTION_SIZE); + + LTRACEF("loader address %p, phys 0x%lx, surrounding large page 0x%lx\n", + &arm_chain_load, loader_pa, loader_pa_section); + + vmm_aspace_t *myspace; + ret = vmm_create_aspace(&myspace, "bootload", 0); + if (ret != 0) { + panic("Could not create new aspace %d\n", ret); + } + + get_current_thread()->aspace = myspace; + thread_sleep(1); + + /* using large pages, map around the target location */ + if ((ret = arch_mmu_map(&myspace->arch_aspace, loader_pa_section, + loader_pa_section, (2 * SECTION_SIZE / PAGE_SIZE), 0)) != 0) { + panic("Could not map loader into new space %d\n", ret); + } +#else + /* for non vm case, just branch directly into it */ + entry_pa = (paddr_t)entry; + loader_pa = (paddr_t)&arm_chain_load; +#endif + + LTRACEF("disabling instruction/data cache\n"); + arch_disable_cache(UCACHE); + + LTRACEF("branching to physical address of loader, (va --> pa) (%p --> %p)\n", + loader_pa, vaddr_to_paddr((addr_t)loader_pa)); + + void (*loader)(paddr_t entry, ulong, ulong, ulong, ulong) __NO_RETURN = (void *)loader_pa; + loader(entry_pa, arg0, arg1, arg2, arg3); } /* switch to user mode, set the user stack pointer to user_stack_top, put the svc stack pointer to the top of the kernel stack */ diff --git a/arch/arm64/asm.S b/arch/arm64/asm.S index 397aba56b..13606a822 100644 --- a/arch/arm64/asm.S +++ b/arch/arm64/asm.S @@ -95,7 +95,7 @@ FUNCTION(arm64_elX_to_el1) cmp x4, #(0b01 << 2) bne .notEL1 /* Already in EL1 */ - ret + ret .notEL1: cmp x4, #(0b10 << 2) @@ -146,3 +146,25 @@ FUNCTION(arm64_elX_to_el1) .Ltarget: ret + +/* void arm_chain_load(paddr_t entry, ulong arg0, ulong arg1, ulong arg2, ulong arg3) __NO_RETURN; */ +/* shut down the system, branching into the secondary system */ +FUNCTION(arm_chain_load) + /* shuffle the args around */ + mov x5, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + mov x3, x4 + + /* Turn off MMU */ + // LK runs in EL1 + /* Disable MMU */ + mrs x6, sctlr_el1 + bic x6, x6, #(1 << 0) // Disable MMU + msr sctlr_el1, x6 + isb + br x5 + b . // should never reach here + + diff --git a/arch/arm64/cache-ops.S b/arch/arm64/cache-ops.S index f5fa49d1d..ed625dd7e 100644 --- a/arch/arm64/cache-ops.S +++ b/arch/arm64/cache-ops.S @@ -24,6 +24,7 @@ #include #include #include +#include .text @@ -58,3 +59,72 @@ FUNCTION(arch_sync_cache_range) cache_range_op dc cvau // clean dcache to PoU by MVA cache_range_op ic ivau // invalidate icache to PoU by MVA ret + + /* void arch_disable_cache(uint flags); */ +FUNCTION(arch_disable_cache) + // LK runs in EL1 + + /* Disable iCache and dCache */ +.inEL1: + mrs x4, sctlr_el1 + bic x4, x4, #(1 << 12) // Disable iCache + bic x4, x4, #(1 << 2) // Disable dCache + msr sctlr_el1, x4 + isb + + /* Clean and Invalidate dCache */ + mrs x0, CLIDR_EL1 + and w3, w0, #0x07000000 // Bits: 26:24 Level of Coherence + lsr w3, w3, #23 // Store 2 x LoC in W3 + cbz w3, Finished // If 0, we are done + mov w10, #0 // store 2x cache level (since csselr starts at bit 1) + mov w8, #1 + +Loop1: + add w2, w10, w10, lsr #1 // Calculate 3x cache level a(w10 + 2w10 = 3w10) + lsr w1, w0, w2 // read cType (cache type) + and w1, w1, #0x7 // mask 3-bits + cmp w1, #2 // types >=2 include data cache + b.lt Skip // skip if no data cache implemented + msr csselr_el1, x10 // select the cache level + isb // sync + mrs x1, ccsidr_el1 // read ccsidr (current cache size id) + and w2, w1, #0x7 // w2 = log2(linesize) - 4 + add w2, w2, #4 // w2 = log2(linesize) + ubfx w4, w1, #3, #10 // w4 = way number (associativity) + clz w5, w4 // w5 = 32 - log2(ways), bit pos in dc operand + lsl w9, w4, w5 // w9 = max way number, aligned to position in dc operand + lsl w16, w8, w5 // w16 = amount to decrement way number per iteration + +Loop2: + ubfx w7, w1, #13, #15 // w7 = max set number + lsl w7, w7, w2 // w7= max set number, aligned to position in dc operand + lsl w17, w8, w2 // w17 = amount to decrement set number per iteration + +Loop3: + orr w11, w10, w9 // w11 = combine way number, cache number and set num for dc operand + orr w11, w11, w7 + dc cisw, x11 // perform clean by set and way + subs w7, w7, w17 // decrement set number + b.ge Loop3 + subs x9, x9, x16 // decrement way number + b.ge Loop2 + +Skip: + add w10, w10, #2 + cmp w3, w10 + dsb sy + b.gt Loop1 + +Finished: + /* invalidate iCache*/ + ic ialluis + isb + + /* invalidate TLB */ + tlbi vmalle1 + dsb sy + isb + ret + + diff --git a/arch/arm64/include/arch/arm64.h b/arch/arm64/include/arch/arm64.h index 9efa77c8f..d61c11319 100644 --- a/arch/arm64/include/arch/arm64.h +++ b/arch/arm64/include/arch/arm64.h @@ -49,6 +49,8 @@ __BEGIN_CDECLS void arm64_context_switch(vaddr_t *old_sp, vaddr_t new_sp); +void arm_chain_load(paddr_t entry, ulong arg0, ulong arg1, ulong arg2, ulong arg3) __NO_RETURN; + /* exception handling */ struct arm64_iframe_long { uint64_t r[30]; diff --git a/platform/amlogic-s912d/platform.c b/platform/amlogic-s912d/platform.c index cb0e9792a..669b65882 100644 --- a/platform/amlogic-s912d/platform.c +++ b/platform/amlogic-s912d/platform.c @@ -107,5 +107,9 @@ void platform_early_init(void) pmm_add_arena(&arena); // TODO: Reserve memory regions if needed + struct list_node list = LIST_INITIAL_VALUE(list); + pmm_alloc_range(0x01080000, 0x01A80000 / PAGE_SIZE, &list); + + } diff --git a/platform/amlogic-s912d/rules.mk b/platform/amlogic-s912d/rules.mk index fd203919d..734524fee 100644 --- a/platform/amlogic-s912d/rules.mk +++ b/platform/amlogic-s912d/rules.mk @@ -9,21 +9,22 @@ MODULE_DEPS += \ dev/timer/arm_generic \ MODULE_SRCS += \ - $(LOCAL_DIR)/platform.c \ - $(LOCAL_DIR)/uart.c \ + $(LOCAL_DIR)/platform.c \ + $(LOCAL_DIR)/uart.c \ ARCH := arm64 ARM_CPU := cortex-a53 MEMBASE := 0 MEMSIZE := 0x80000000 # 2GB -KERNEL_LOAD_OFFSET := 0x01080000 +KERNEL_LOAD_OFFSET := 0x01A80000 + LINKER_SCRIPT += \ - $(BUILDDIR)/system-onesegment.ld + $(BUILDDIR)/system-onesegment.ld MODULE := $(LOCAL_DIR) MODULE_DEPS += \ - app/shell \ + app/shell \ WITH_CPP_SUPPORT=true