From 2790af35f33b3961aa259564de0fc1993b7f3f9d Mon Sep 17 00:00:00 2001 From: Huaqi Fang <578567190@qq.com> Date: Wed, 28 Oct 2020 11:35:48 +0800 Subject: [PATCH 1/4] [arch][riscv][nuclei] Add Nuclei RISC-V processsor support * This PR add Nuclei RISC-V Processor which use CLIC interrupt system instead of PLIC and CLINT interrupt system * This PR is also using Nuclei NMSIS and SDK, see https://github.com/Nuclei-Software/NMSIS and https://github.com/Nuclei-Software/nuclei-sdk * In this PR, the context switch is done using CLIC software interrupt * For RISC-V CLIC spec, please check https://github.com/riscv/riscv-fast-interrupt/blob/master/clic.adoc * To evaluate this support, Nuclei HummingBird RISC-V SoC is supported, please check https://nucleisys.com/developboard.php for this board introduction Signed-off-by: Huaqi Fang <578567190@qq.com> --- arch/riscv/arch.c | 49 + arch/riscv/asm.S | 381 + arch/riscv/exceptions.c | 14 + arch/riscv/include/arch/arch_thread.h | 40 + arch/riscv/include/arch/riscv.h | 5 + arch/riscv/linker-twosegment.ld | 14 +- arch/riscv/rules.mk | 13 + arch/riscv/start.S | 4 +- arch/riscv/thread.c | 82 +- arch/riscv/time.c | 73 + arch/riscv/vectab.S | 52 + .../NMSIS/Core/Include/core_compatiable.h | 232 + .../NMSIS/Core/Include/core_feature_base.h | 1177 + .../NMSIS/Core/Include/core_feature_cache.h | 124 + .../NMSIS/Core/Include/core_feature_dsp.h | 18659 ++++++++++++++++ .../NMSIS/Core/Include/core_feature_eclic.h | 897 + .../NMSIS/Core/Include/core_feature_fpu.h | 304 + .../NMSIS/Core/Include/core_feature_pmp.h | 260 + .../NMSIS/Core/Include/core_feature_timer.h | 364 + .../NMSIS/Core/Include/nmsis_compiler.h | 37 + .../nuclei/NMSIS/Core/Include/nmsis_core.h | 87 + .../nuclei/NMSIS/Core/Include/nmsis_gcc.h | 265 + .../nuclei/NMSIS/Core/Include/nmsis_version.h | 87 + .../nuclei/NMSIS/Core/Include/riscv_bits.h | 92 + .../NMSIS/Core/Include/riscv_encoding.h | 617 + external/arch/riscv/nuclei/NMSIS/rules.mk | 3 + external/platform/hbird/NMSIS/hbird.h | 459 + external/platform/hbird/NMSIS/rules.mk | 4 + external/platform/hbird/NMSIS/system_hbird.h | 79 + external/platform/hbird/inc/hbird_gpio.h | 56 + external/platform/hbird/inc/hbird_uart.h | 75 + external/platform/hbird/inc/nuclei_sdk_soc.h | 17 + external/platform/hbird/rules.mk | 15 + external/platform/hbird/src/hbird_common.c | 69 + external/platform/hbird/src/hbird_gpio.c | 180 + external/platform/hbird/src/hbird_uart.c | 105 + external/platform/hbird/src/system_hbird.c | 402 + platform/nuclei-hbird/platform.c | 36 + platform/nuclei-hbird/platform_p.h | 16 + platform/nuclei-hbird/rules.mk | 25 + platform/nuclei-hbird/uart.c | 93 + platform/nuclei-hbird/vectab.c | 94 + project/nuclei-hbird.mk | 8 + project/target/nuclei-hbird.mk | 2 + .../nuclei-hbird/include/board_hbird_eval.h | 37 + target/nuclei-hbird/include/nuclei_sdk_hal.h | 20 + .../include/platform/nuclei-hbird.h | 28 + target/nuclei-hbird/openocd_hbird.cfg | 50 + target/nuclei-hbird/rules.mk | 34 + target/nuclei-hbird/target.c | 24 + 50 files changed, 25845 insertions(+), 15 deletions(-) create mode 100644 arch/riscv/vectab.S create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/core_compatiable.h create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_base.h create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_cache.h create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_dsp.h create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_eclic.h create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_fpu.h create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_pmp.h create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_timer.h create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_compiler.h create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_core.h create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_gcc.h create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_version.h create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/riscv_bits.h create mode 100644 external/arch/riscv/nuclei/NMSIS/Core/Include/riscv_encoding.h create mode 100644 external/arch/riscv/nuclei/NMSIS/rules.mk create mode 100644 external/platform/hbird/NMSIS/hbird.h create mode 100644 external/platform/hbird/NMSIS/rules.mk create mode 100644 external/platform/hbird/NMSIS/system_hbird.h create mode 100644 external/platform/hbird/inc/hbird_gpio.h create mode 100644 external/platform/hbird/inc/hbird_uart.h create mode 100644 external/platform/hbird/inc/nuclei_sdk_soc.h create mode 100644 external/platform/hbird/rules.mk create mode 100644 external/platform/hbird/src/hbird_common.c create mode 100644 external/platform/hbird/src/hbird_gpio.c create mode 100644 external/platform/hbird/src/hbird_uart.c create mode 100644 external/platform/hbird/src/system_hbird.c create mode 100644 platform/nuclei-hbird/platform.c create mode 100644 platform/nuclei-hbird/platform_p.h create mode 100644 platform/nuclei-hbird/rules.mk create mode 100644 platform/nuclei-hbird/uart.c create mode 100644 platform/nuclei-hbird/vectab.c create mode 100644 project/nuclei-hbird.mk create mode 100644 project/target/nuclei-hbird.mk create mode 100644 target/nuclei-hbird/include/board_hbird_eval.h create mode 100644 target/nuclei-hbird/include/nuclei_sdk_hal.h create mode 100644 target/nuclei-hbird/include/platform/nuclei-hbird.h create mode 100644 target/nuclei-hbird/openocd_hbird.cfg create mode 100644 target/nuclei-hbird/rules.mk create mode 100644 target/nuclei-hbird/target.c diff --git a/arch/riscv/arch.c b/arch/riscv/arch.c index fc1e4773e..913ff2ec1 100644 --- a/arch/riscv/arch.c +++ b/arch/riscv/arch.c @@ -18,6 +18,11 @@ #include "riscv_priv.h" +#ifdef RISCV_VARIANT_NUCLEI +#include +volatile unsigned long riscv_reschedule = 0; +#endif + #define LOCAL_TRACE 0 // per cpu structure, pointed to by xscratch @@ -35,6 +40,36 @@ void riscv_configure_percpu_early(uint hart_id) { // first C level code to initialize each cpu void riscv_early_init_percpu(void) { +#ifdef RISCV_VARIANT_NUCLEI + extern void *vectab; + extern void exc_entry(void); + extern void irq_entry(void); + extern void _premain_init(void); + extern void platform_init_timer(void); + extern unsigned long default_stack_top; + unsigned long entry_tmp = 0; + + // set nmi exception to mtvec + riscv_csr_set(CSR_MMISC_CTL, MMISC_CTL_NMI_CAUSE_FFF); + // set clic vector base + riscv_csr_write(CSR_MTVT, (uintptr_t)&vectab); + entry_tmp = ((unsigned long)irq_entry) | 0x1; + // set clic non-vector irq entry + riscv_csr_write(CSR_MTVT2, (uintptr_t)entry_tmp); + entry_tmp = (((unsigned long)exc_entry) & (~(0x3FUL))) | 0x3; + // set exception entry and enable clic mode + riscv_csr_write(CSR_MTVEC, (uintptr_t)entry_tmp); + // enable cycle and instret counter + riscv_csr_set(mcounteren, 0x5); + // set csr mscratch for interrupt stack usage + + _premain_init(); + + platform_init_timer(); + // mask all exceptions, just in case + riscv_csr_clear(RISCV_CSR_XSTATUS, RISCV_CSR_XSTATUS_IE); + riscv_csr_write(CSR_MSCRATCH, &default_stack_top); +#else // set the top level exception handler riscv_csr_write(RISCV_CSR_XTVEC, (uintptr_t)&riscv_exception_entry); @@ -44,6 +79,7 @@ void riscv_early_init_percpu(void) { // enable cycle counter (disabled for now, unimplemented on sifive-e) //riscv_csr_set(mcounteren, 1); +#endif } // called very early just after entering C code on boot processor @@ -62,6 +98,18 @@ void riscv_init_percpu(void) { riscv_csr_set(RISCV_CSR_XIE, RISCV_CSR_XIE_EIE); } +#ifdef RISCV_VARIANT_NUCLEI +void riscv_clic_irq_entry(void) { + THREAD_STATS_INC(interrupts); +} + +void riscv_clic_irq_exit(bool reschedule) { + if (reschedule != INT_NO_RESCHEDULE) { + riscv_reschedule = reschedule; + } +} +#endif + // called later once the kernel is running before platform and target init void arch_init(void) { riscv_init_percpu(); @@ -95,6 +143,7 @@ void arch_init(void) { void arch_idle(void) { // let the platform/target disable wfi #if !RISCV_DISABLE_WFI + printf("Idle\n"); __asm__ volatile("wfi"); #endif } diff --git a/arch/riscv/asm.S b/arch/riscv/asm.S index f5f201d51..54cfa83b7 100644 --- a/arch/riscv/asm.S +++ b/arch/riscv/asm.S @@ -5,6 +5,7 @@ * license that can be found in the LICENSE file or at * https://opensource.org/licenses/MIT */ +#ifndef RISCV_VARIANT_NUCLEI #include #include #include @@ -104,3 +105,383 @@ FUNCTION(riscv_exception_entry) RISCV_XRET END_FUNCTION(riscv_exception_entry) +#endif + +#ifdef RISCV_VARIANT_NUCLEI +#include + +#ifndef __riscv_32e +#define portRegNum 30 +#else +#define portRegNum 14 +#endif + +#define portCONTEXT_SIZE ( portRegNum * REGBYTES ) + + .extern rt_interrupt_from_thread + .extern rt_interrupt_to_thread + +.align 8 + +/** + * \brief Global interrupt disabled + * \details + * This function disable global interrupt. + * \remarks + * - All the interrupt requests will be ignored by CPU. + */ +.macro DISABLE_MIE + csrc CSR_MSTATUS, MSTATUS_MIE +.endm + +/** + * \brief Macro for context save + * \details + * This macro save ABI defined caller saved registers in the stack. + * \remarks + * - This Macro could use to save context when you enter to interrupt + * or exception +*/ +/* Save caller registers */ +.macro SAVE_CONTEXT + csrrw sp, CSR_MSCRATCHCSWL, sp + /* Allocate stack space for context saving */ +#ifndef __riscv_32e + addi sp, sp, -20*REGBYTES +#else + addi sp, sp, -14*REGBYTES +#endif /* __riscv_32e */ + + STORE x1, 0*REGBYTES(sp) + /* STORE x4, 1*REGBYTES(sp) */ + STORE x5, 2*REGBYTES(sp) + STORE x6, 3*REGBYTES(sp) + STORE x7, 4*REGBYTES(sp) + STORE x10, 5*REGBYTES(sp) + STORE x11, 6*REGBYTES(sp) + STORE x12, 7*REGBYTES(sp) + STORE x13, 8*REGBYTES(sp) + STORE x14, 9*REGBYTES(sp) + STORE x15, 10*REGBYTES(sp) +#ifndef __riscv_32e + STORE x16, 14*REGBYTES(sp) + STORE x17, 15*REGBYTES(sp) + STORE x28, 16*REGBYTES(sp) + STORE x29, 17*REGBYTES(sp) + STORE x30, 18*REGBYTES(sp) + STORE x31, 19*REGBYTES(sp) +#endif /* __riscv_32e */ +.endm + +/** + * \brief Macro for restore caller registers + * \details + * This macro restore ABI defined caller saved registers from stack. + * \remarks + * - You could use this macro to restore context before you want return + * from interrupt or exeception + */ +/* Restore caller registers */ +.macro RESTORE_CONTEXT + LOAD x1, 0*REGBYTES(sp) + /* LOAD x4, 1*REGBYTES(sp) */ + LOAD x5, 2*REGBYTES(sp) + LOAD x6, 3*REGBYTES(sp) + LOAD x7, 4*REGBYTES(sp) + LOAD x10, 5*REGBYTES(sp) + LOAD x11, 6*REGBYTES(sp) + LOAD x12, 7*REGBYTES(sp) + LOAD x13, 8*REGBYTES(sp) + LOAD x14, 9*REGBYTES(sp) + LOAD x15, 10*REGBYTES(sp) +#ifndef __riscv_32e + LOAD x16, 14*REGBYTES(sp) + LOAD x17, 15*REGBYTES(sp) + LOAD x28, 16*REGBYTES(sp) + LOAD x29, 17*REGBYTES(sp) + LOAD x30, 18*REGBYTES(sp) + LOAD x31, 19*REGBYTES(sp) + + /* De-allocate the stack space */ + addi sp, sp, 20*REGBYTES +#else + /* De-allocate the stack space */ + addi sp, sp, 14*REGBYTES +#endif /* __riscv_32e */ + csrrw sp, CSR_MSCRATCHCSWL, sp +.endm + +/** + * \brief Macro for save necessary CSRs to stack + * \details + * This macro store MCAUSE, MEPC, MSUBM to stack. + */ +.macro SAVE_CSR_CONTEXT + /* Store CSR mcause to stack using pushmcause */ + csrrwi x0, CSR_PUSHMCAUSE, 11 + /* Store CSR mepc to stack using pushmepc */ + csrrwi x0, CSR_PUSHMEPC, 12 + /* Store CSR msub to stack using pushmsub */ + csrrwi x0, CSR_PUSHMSUBM, 13 +.endm + +/** + * \brief Macro for restore necessary CSRs from stack + * \details + * This macro restore MSUBM, MEPC, MCAUSE from stack. + */ +.macro RESTORE_CSR_CONTEXT + LOAD x5, 13*REGBYTES(sp) + csrw CSR_MSUBM, x5 + LOAD x5, 12*REGBYTES(sp) + csrw CSR_MEPC, x5 + LOAD x5, 11*REGBYTES(sp) + csrw CSR_MCAUSE, x5 +.endm + +/** + * \brief Exception/NMI Entry + * \details + * This function provide common entry functions for exception/nmi. + * \remarks + * This function provide a default exception/nmi entry. + * ABI defined caller save register and some CSR registers + * to be saved before enter interrupt handler and be restored before return. + */ +.section .text.trap +/* In CLIC mode, the exeception entry must be 64bytes aligned */ +.align 6 +.global exc_entry +exc_entry: + /* Save the caller saving registers (context) */ + SAVE_CONTEXT + /* Save the necessary CSR registers */ + SAVE_CSR_CONTEXT + + /* + * Set the exception handler function arguments + * argument 1: mcause value + * argument 2: current stack point(SP) value + */ + csrr a0, mcause + mv a1, sp + /* + * TODO: Call the exception handler function + * By default, the function template is provided in + * system_Device.c, you can adjust it as you want + */ + call core_exception_handler + + /* Restore the necessary CSR registers */ + RESTORE_CSR_CONTEXT + /* Restore the caller saving registers (context) */ + RESTORE_CONTEXT + + /* Return to regular code */ + mret + +/** + * \brief Non-Vector Interrupt Entry + * \details + * This function provide common entry functions for handling + * non-vector interrupts + * \remarks + * This function provide a default non-vector interrupt entry. + * ABI defined caller save register and some CSR registers need + * to be saved before enter interrupt handler and be restored before return. + */ +.section .text.irq +/* In CLIC mode, the interrupt entry must be 4bytes aligned */ +.align 2 +.global irq_entry +/* This label will be set to MTVT2 register */ +irq_entry: + /* Save the caller saving registers (context) */ + SAVE_CONTEXT + /* Save the necessary CSR registers */ + SAVE_CSR_CONTEXT + + /* This special CSR read/write operation, which is actually + * claim the CLIC to find its pending highest ID, if the ID + * is not 0, then automatically enable the mstatus.MIE, and + * jump to its vector-entry-label, and update the link register + */ + csrrw ra, CSR_JALMNXTI, ra + + /* Critical section with interrupts disabled */ + DISABLE_MIE + + jal riscv_irq_exit + + /* Restore the necessary CSR registers */ + RESTORE_CSR_CONTEXT + /* Restore the caller saving registers (context) */ + RESTORE_CONTEXT + + /* Return to regular code */ + mret + +/* Default Handler for Exceptions / Interrupts */ +.global default_intexc_handler +.weak default_intexc_handler +Undef_Handler: +default_intexc_handler: +1: + j 1b + + .global arch_context_start + +/* Start the first task. This also clears the bit that indicates the FPU is + in use in case the FPU was used before the scheduler was started - which + would otherwise result in the unnecessary leaving of space in the stack + for lazy saving of FPU registers. */ +.align 3 +arch_context_start: + /* Setup Interrupt Stack using + The stack that was used by main() + before the scheduler is started is + no longer required after the scheduler is started. + Interrupt stack pointer is stored in CSR_MSCRATCH */ + LOAD sp, 0x0(a0) /* Read sp from first TCB member(a0) */ + + /* Pop PC from stack and set MEPC */ + LOAD t0, 0 * REGBYTES(sp) + csrw CSR_MEPC, t0 + /* Pop mstatus from stack and set it */ + LOAD t0, (portRegNum - 1) * REGBYTES(sp) + csrw CSR_MSTATUS, t0 + /* Interrupt still disable here */ + /* Restore Registers from Stack */ + LOAD x1, 1 * REGBYTES(sp) /* RA */ + LOAD x5, 2 * REGBYTES(sp) + LOAD x6, 3 * REGBYTES(sp) + LOAD x7, 4 * REGBYTES(sp) + LOAD x8, 5 * REGBYTES(sp) + LOAD x9, 6 * REGBYTES(sp) + LOAD x10, 7 * REGBYTES(sp) + LOAD x11, 8 * REGBYTES(sp) + LOAD x12, 9 * REGBYTES(sp) + LOAD x13, 10 * REGBYTES(sp) + LOAD x14, 11 * REGBYTES(sp) + LOAD x15, 12 * REGBYTES(sp) +#ifndef __riscv_32e + LOAD x16, 13 * REGBYTES(sp) + LOAD x17, 14 * REGBYTES(sp) + LOAD x18, 15 * REGBYTES(sp) + LOAD x19, 16 * REGBYTES(sp) + LOAD x20, 17 * REGBYTES(sp) + LOAD x21, 18 * REGBYTES(sp) + LOAD x22, 19 * REGBYTES(sp) + LOAD x23, 20 * REGBYTES(sp) + LOAD x24, 21 * REGBYTES(sp) + LOAD x25, 22 * REGBYTES(sp) + LOAD x26, 23 * REGBYTES(sp) + LOAD x27, 24 * REGBYTES(sp) + LOAD x28, 25 * REGBYTES(sp) + LOAD x29, 26 * REGBYTES(sp) + LOAD x30, 27 * REGBYTES(sp) + LOAD x31, 28 * REGBYTES(sp) +#endif + + addi sp, sp, portCONTEXT_SIZE + + mret + +.align 2 +.global riscv_msip_handler +riscv_msip_handler: + addi sp, sp, -portCONTEXT_SIZE + STORE x1, 1 * REGBYTES(sp) /* RA */ + STORE x5, 2 * REGBYTES(sp) + STORE x6, 3 * REGBYTES(sp) + STORE x7, 4 * REGBYTES(sp) + STORE x8, 5 * REGBYTES(sp) + STORE x9, 6 * REGBYTES(sp) + STORE x10, 7 * REGBYTES(sp) + STORE x11, 8 * REGBYTES(sp) + STORE x12, 9 * REGBYTES(sp) + STORE x13, 10 * REGBYTES(sp) + STORE x14, 11 * REGBYTES(sp) + STORE x15, 12 * REGBYTES(sp) +#ifndef __riscv_32e + STORE x16, 13 * REGBYTES(sp) + STORE x17, 14 * REGBYTES(sp) + STORE x18, 15 * REGBYTES(sp) + STORE x19, 16 * REGBYTES(sp) + STORE x20, 17 * REGBYTES(sp) + STORE x21, 18 * REGBYTES(sp) + STORE x22, 19 * REGBYTES(sp) + STORE x23, 20 * REGBYTES(sp) + STORE x24, 21 * REGBYTES(sp) + STORE x25, 22 * REGBYTES(sp) + STORE x26, 23 * REGBYTES(sp) + STORE x27, 24 * REGBYTES(sp) + STORE x28, 25 * REGBYTES(sp) + STORE x29, 26 * REGBYTES(sp) + STORE x30, 27 * REGBYTES(sp) + STORE x31, 28 * REGBYTES(sp) +#endif + /* Push mstatus to stack */ + csrr t0, CSR_MSTATUS + STORE t0, (portRegNum - 1) * REGBYTES(sp) + + /* Push additional registers */ + + /* Store sp to task stack */ + LOAD t0, rt_interrupt_from_thread + STORE sp, 0(t0) + + csrr t0, CSR_MEPC + STORE t0, 0(sp) + + jal riscv_msip_process + + /* Switch task context */ + LOAD t0, rt_interrupt_to_thread + LOAD sp, 0x0(t0) + + /* Pop PC from stack and set MEPC */ + LOAD t0, 0 * REGBYTES(sp) + csrw CSR_MEPC, t0 + /* Pop additional registers */ + + /* Pop mstatus from stack and set it */ + LOAD t0, (portRegNum - 1) * REGBYTES(sp) + csrw CSR_MSTATUS, t0 + /* Interrupt still disable here */ + /* Restore Registers from Stack */ + LOAD x1, 1 * REGBYTES(sp) /* RA */ + LOAD x5, 2 * REGBYTES(sp) + LOAD x6, 3 * REGBYTES(sp) + LOAD x7, 4 * REGBYTES(sp) + LOAD x8, 5 * REGBYTES(sp) + LOAD x9, 6 * REGBYTES(sp) + LOAD x10, 7 * REGBYTES(sp) + LOAD x11, 8 * REGBYTES(sp) + LOAD x12, 9 * REGBYTES(sp) + LOAD x13, 10 * REGBYTES(sp) + LOAD x14, 11 * REGBYTES(sp) + LOAD x15, 12 * REGBYTES(sp) +#ifndef __riscv_32e + LOAD x16, 13 * REGBYTES(sp) + LOAD x17, 14 * REGBYTES(sp) + LOAD x18, 15 * REGBYTES(sp) + LOAD x19, 16 * REGBYTES(sp) + LOAD x20, 17 * REGBYTES(sp) + LOAD x21, 18 * REGBYTES(sp) + LOAD x22, 19 * REGBYTES(sp) + LOAD x23, 20 * REGBYTES(sp) + LOAD x24, 21 * REGBYTES(sp) + LOAD x25, 22 * REGBYTES(sp) + LOAD x26, 23 * REGBYTES(sp) + LOAD x27, 24 * REGBYTES(sp) + LOAD x28, 25 * REGBYTES(sp) + LOAD x29, 26 * REGBYTES(sp) + LOAD x30, 27 * REGBYTES(sp) + LOAD x31, 28 * REGBYTES(sp) +#endif + + addi sp, sp, portCONTEXT_SIZE + mret + +#endif \ No newline at end of file diff --git a/arch/riscv/exceptions.c b/arch/riscv/exceptions.c index f8fd67733..7e3d55a13 100644 --- a/arch/riscv/exceptions.c +++ b/arch/riscv/exceptions.c @@ -13,6 +13,7 @@ #define LOCAL_TRACE 0 +#ifndef RISCV_VARIANT_NUCLEI // keep in sync with asm.S struct riscv_short_iframe { ulong epc; @@ -118,3 +119,16 @@ void riscv_exception_handler(long cause, ulong epc, struct riscv_short_iframe *f thread_preempt(); } } +#else +extern volatile unsigned long riscv_reschedule; +extern volatile unsigned long rt_preemt_flag; +void riscv_irq_exit(void) +{ + if (riscv_reschedule != INT_NO_RESCHEDULE) { + riscv_reschedule = INT_NO_RESCHEDULE; + rt_preemt_flag = 1; + thread_preempt(); + rt_preemt_flag = 0; + } +} +#endif diff --git a/arch/riscv/include/arch/arch_thread.h b/arch/riscv/include/arch/arch_thread.h index 78291792f..6a445ac91 100644 --- a/arch/riscv/include/arch/arch_thread.h +++ b/arch/riscv/include/arch/arch_thread.h @@ -9,6 +9,45 @@ #include +#ifdef RISCV_VARIANT_NUCLEI +struct riscv_context_switch_frame { + unsigned long epc; /* epc - epc - program counter */ + unsigned long ra; /* x1 - ra - return address for jumps */ + unsigned long t0; /* x5 - t0 - temporary register 0 */ + unsigned long t1; /* x6 - t1 - temporary register 1 */ + unsigned long t2; /* x7 - t2 - temporary register 2 */ + unsigned long s0_fp; /* x8 - s0/fp - saved register 0 or frame pointer */ + unsigned long s1; /* x9 - s1 - saved register 1 */ + unsigned long a0; /* x10 - a0 - return value or function argument 0 */ + unsigned long a1; /* x11 - a1 - return value or function argument 1 */ + unsigned long a2; /* x12 - a2 - function argument 2 */ + unsigned long a3; /* x13 - a3 - function argument 3 */ + unsigned long a4; /* x14 - a4 - function argument 4 */ + unsigned long a5; /* x15 - a5 - function argument 5 */ +#ifndef __riscv_32e + unsigned long a6; /* x16 - a6 - function argument 6 */ + unsigned long a7; /* x17 - s7 - function argument 7 */ + unsigned long s2; /* x18 - s2 - saved register 2 */ + unsigned long s3; /* x19 - s3 - saved register 3 */ + unsigned long s4; /* x20 - s4 - saved register 4 */ + unsigned long s5; /* x21 - s5 - saved register 5 */ + unsigned long s6; /* x22 - s6 - saved register 6 */ + unsigned long s7; /* x23 - s7 - saved register 7 */ + unsigned long s8; /* x24 - s8 - saved register 8 */ + unsigned long s9; /* x25 - s9 - saved register 9 */ + unsigned long s10; /* x26 - s10 - saved register 10 */ + unsigned long s11; /* x27 - s11 - saved register 11 */ + unsigned long t3; /* x28 - t3 - temporary register 3 */ + unsigned long t4; /* x29 - t4 - temporary register 4 */ + unsigned long t5; /* x30 - t5 - temporary register 5 */ + unsigned long t6; /* x31 - t6 - temporary register 6 */ +#endif + unsigned long mstatus; /* - machine status register */ +}; +struct arch_thread { + struct riscv_context_switch_frame *cs_frame; +}; +#else struct riscv_context_switch_frame { unsigned long ra; // return address (x1) unsigned long sp; // stack pointer (x2) @@ -31,6 +70,7 @@ struct riscv_context_switch_frame { struct arch_thread { struct riscv_context_switch_frame cs_frame; }; +#endif void riscv_context_switch(struct riscv_context_switch_frame *oldcs, struct riscv_context_switch_frame *newcs); diff --git a/arch/riscv/include/arch/riscv.h b/arch/riscv/include/arch/riscv.h index 7bda2f20b..44dda4bbb 100644 --- a/arch/riscv/include/arch/riscv.h +++ b/arch/riscv/include/arch/riscv.h @@ -170,6 +170,11 @@ static inline uint riscv_current_hart(void) { void riscv_set_secondary_count(int count); +#ifdef RISCV_VARIANT_NUCLEI +void riscv_clic_irq_entry(void); +void riscv_clic_irq_exit(bool reschedule); +#endif /* RISCV_VARIANT_NUCLEI */ + void riscv_exception_entry(void); enum handler_return riscv_timer_exception(void); diff --git a/arch/riscv/linker-twosegment.ld b/arch/riscv/linker-twosegment.ld index b2eb2ac39..b81033243 100644 --- a/arch/riscv/linker-twosegment.ld +++ b/arch/riscv/linker-twosegment.ld @@ -18,12 +18,13 @@ SECTIONS { . = %ROMBASE%; - _start = .; __rom_start = .; /* text/read-only data */ /* set the load address to physical MEMBASE */ .text : { + KEEP(*(.text.boot.vectab1)) + KEEP(*(.text.boot.vectab2)) KEEP(*(.text.boot)) *(.text .text*) *(.gnu.linkonce.t.*) @@ -61,17 +62,14 @@ SECTIONS __dtor_end = .; *(.got*) *(.dynamic) - } :data - - /* Try to put sdata and sbss near each other by putting sdata at the end of the data segment - * and sbss at the start of the bss segment. This maximizes reach of things referenced off of - * the global pointer. */ - .sdata : { + . = ALIGN(8); __global_pointer$ = . + (4K / 2); /* Question: should we put srodata here on multi seg binaries? */ *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*) *(.sdata .sdata.* .gnu.linkonce.s.*) - } + } :data + + . = ALIGN(%BITS% / 8); __data_end = .; diff --git a/arch/riscv/rules.mk b/arch/riscv/rules.mk index 809f348d8..3f93eebde 100644 --- a/arch/riscv/rules.mk +++ b/arch/riscv/rules.mk @@ -30,8 +30,20 @@ endif SUBARCH ?= 32 +# Different vendor variant of riscv +VARIANT ?= + RISCV_MODE ?= machine +ifeq ($(VARIANT),nuclei) +MODULE_DEPS += \ + arch/riscv/nuclei/NMSIS + +MODULE_SRCS += $(LOCAL_DIR)/vectab.S + +GLOBAL_DEFINES += RISCV_VARIANT_NUCLEI=1 +endif + ifeq ($(strip $(RISCV_MODE)),machine) $(info RISCV: Machine Mode) GLOBAL_DEFINES += RISCV_M_MODE=1 @@ -154,6 +166,7 @@ WITH_LINKER_GC ?= 0 endif LIBGCC := $(shell $(TOOLCHAIN_PREFIX)gcc $(GLOBAL_COMPILEFLAGS) $(ARCH_COMPILEFLAGS) $(GLOBAL_CFLAGS) -print-libgcc-file-name) +LIBGCC += $(shell $(TOOLCHAIN_PREFIX)gcc $(GLOBAL_COMPILEFLAGS) $(ARCH_COMPILEFLAGS) $(GLOBAL_CFLAGS) -print-file-name=libstdc++.a) $(info LIBGCC = $(LIBGCC)) # potentially generated files that should be cleaned out with clean make rule diff --git a/arch/riscv/start.S b/arch/riscv/start.S index 7ffe777b0..3416775e9 100644 --- a/arch/riscv/start.S +++ b/arch/riscv/start.S @@ -218,9 +218,9 @@ END_FUNCTION(_mmu_init) .bss .align 4 -LOCAL_DATA(default_stack) +DATA(default_stack) .skip ARCH_DEFAULT_STACK_SIZE * RISCV_MAX_HARTS; -LOCAL_DATA(default_stack_top) +DATA(default_stack_top) // put boot status in .data so it doesn't get paved over during BSS initialization .data diff --git a/arch/riscv/thread.c b/arch/riscv/thread.c index 15715c9cc..2014237ed 100644 --- a/arch/riscv/thread.c +++ b/arch/riscv/thread.c @@ -5,6 +5,9 @@ * license that can be found in the LICENSE file or at * https://opensource.org/licenses/MIT */ +#ifdef RISCV_VARIANT_NUCLEI +#include +#endif #include #include #include @@ -16,6 +19,19 @@ #define LOCAL_TRACE 0 +#ifdef RISCV_VARIANT_NUCLEI +#define portINITIAL_MSTATUS ( MSTATUS_MPP | MSTATUS_FS_INITIAL) +extern void arch_context_start(unsigned long sp); +static void riscv_idle_thread(void); +static void riscv_trigger_preempt(void); +volatile unsigned long rt_interrupt_from_thread = 0; +volatile unsigned long rt_interrupt_to_thread = 0; +volatile unsigned long rt_realswitch_flag = 0; +volatile unsigned long rt_preemt_flag = 0; + +static uint8_t initial_stack[1024] __SECTION(".bss.prebss.initial_stack") __ALIGNED(8); +#endif + struct thread *_current_thread; static void initial_thread_func(void) __NO_RETURN; @@ -32,7 +48,7 @@ static void initial_thread_func(void) { /* release the thread lock that was implicitly held across the reschedule */ spin_unlock(&thread_lock); arch_enable_ints(); - + int ret = ct->entry(ct->arg); LTRACEF("thread %p exiting with %d\n", ct, ret); @@ -42,14 +58,22 @@ static void initial_thread_func(void) { void arch_thread_initialize(thread_t *t) { /* zero out the thread context */ - memset(&t->arch.cs_frame, 0, sizeof(t->arch.cs_frame)); /* make sure the top of the stack is 16 byte aligned */ vaddr_t stack_top = ROUNDDOWN((vaddr_t)t->stack + t->stack_size, 16); - +#ifndef RISCV_VARIANT_NUCLEI + memset(&t->arch.cs_frame, 0, sizeof(t->arch.cs_frame)); t->arch.cs_frame.sp = stack_top; t->arch.cs_frame.ra = (vaddr_t)&initial_thread_func; - +#else + extern void arch_idle(void); + stack_top -= sizeof(struct riscv_context_switch_frame); + t->arch.cs_frame = (struct riscv_context_switch_frame *)stack_top; + t->arch.cs_frame->ra = (unsigned long)&arch_idle; + t->arch.cs_frame->a0 = (unsigned long)t->arg; + t->arch.cs_frame->epc = (unsigned long)&initial_thread_func; + t->arch.cs_frame->mstatus = (unsigned long)portINITIAL_MSTATUS; +#endif LTRACEF("t %p (%s) stack top %#lx entry %p arg %p\n", t, t->name, stack_top, t->entry, t->arg); } @@ -57,14 +81,62 @@ void arch_context_switch(thread_t *oldthread, thread_t *newthread) { DEBUG_ASSERT(arch_ints_disabled()); LTRACEF("old %p (%s), new %p (%s)\n", oldthread, oldthread->name, newthread, newthread->name); - +#ifdef RISCV_VARIANT_NUCLEI + LTRACEF("old %s (sp %p), new %s (sp %p)\n", oldthread->name, oldthread->arch.cs_frame, newthread->name, newthread->arch.cs_frame); + if (oldthread->stack_size > 0) { + if (newthread->stack_size == 0) { + newthread->stack = initial_stack; + newthread->stack_size = sizeof(initial_stack); + newthread->entry= riscv_idle_thread; + arch_thread_initialize(newthread); + } + if (rt_realswitch_flag == 0) { + rt_interrupt_from_thread = &(oldthread->arch.cs_frame); + } + rt_realswitch_flag = 1; + rt_interrupt_to_thread = &(newthread->arch.cs_frame); + LTRACEF("int %d, from pc %p, to pc %p\n", rt_realswitch_flag, oldthread->arch.cs_frame->epc, newthread->arch.cs_frame->epc); + riscv_trigger_preempt(); + if (rt_preemt_flag == 0) { + spin_unlock(&thread_lock); + arch_enable_ints(); + } + } else { // First task started + arch_context_start((unsigned long)&(newthread->arch.cs_frame)); + // never return + } + +#else riscv_context_switch(&oldthread->arch.cs_frame, &newthread->arch.cs_frame); +#endif } void arch_dump_thread(thread_t *t) { if (t->state != THREAD_RUNNING) { dprintf(INFO, "\tarch: "); +#ifdef RISCV_VARIANT_NUCLEI + dprintf(INFO, "sp %#lx\n", t->arch.cs_frame); +#else dprintf(INFO, "sp %#lx\n", t->arch.cs_frame.sp); +#endif } } +#ifdef RISCV_VARIANT_NUCLEI +void riscv_msip_process(void) { + /* Clear Software IRQ, A MUST */ + SysTimer_ClearSWIRQ(); + /* Clear the real switch flag */ + rt_realswitch_flag = 0; +} + +static void riscv_trigger_preempt(void) { + /* Set a software interrupt(SWI) request to request a context switch. */ + SysTimer_SetSWIRQ(); + __RWMB(); +} +static void riscv_idle_thread(void) { + for (;;) + arch_idle(); +} +#endif \ No newline at end of file diff --git a/arch/riscv/time.c b/arch/riscv/time.c index 2e78b93b5..f1b0adf4a 100644 --- a/arch/riscv/time.c +++ b/arch/riscv/time.c @@ -5,6 +5,9 @@ * license that can be found in the LICENSE file or at * https://opensource.org/licenses/MIT */ +#ifdef RISCV_VARIANT_NUCLEI +#include +#endif #include #include #include @@ -22,6 +25,75 @@ static platform_timer_callback timer_cb; static void *timer_arg; +#ifdef RISCV_VARIANT_NUCLEI +#define configKERNEL_INTERRUPT_PRIORITY 0 +status_t platform_set_oneshot_timer (platform_timer_callback callback, void *arg, lk_time_t interval) { + LTRACEF("cb %p, arg %p, interval %u\n", callback, arg, interval); + + // disable timer irq + ECLIC_DisableIRQ(SysTimer_IRQn); + + timer_cb = callback; + timer_arg = arg; + + // enable the timer irq + ECLIC_EnableIRQ(SysTimer_IRQn); + + // convert interval to ticks + + uint64_t ticks = ((interval * ARCH_RISCV_MTIME_RATE) / 1000u); + SysTick_Reload(ticks); + + return NO_ERROR; +} + +lk_bigtime_t current_time_hires(void) { +#if ARCH_RISCV_MTIME_RATE < 10000000 + return current_time() * 1000llu; // hack to deal with slow clocks +#else + return SysTimer_GetLoadValue() / (ARCH_RISCV_MTIME_RATE / 1000000u); +#endif +} + +lk_time_t current_time(void) { + return SysTimer_GetLoadValue() / (ARCH_RISCV_MTIME_RATE / 1000u); +} + +void platform_stop_timer(void) { + ECLIC_DisableIRQ(SysTimer_IRQn); +} + +void platform_init_timer(void) +{ + ECLIC_DisableIRQ(SysTimer_IRQn); + ECLIC_SetLevelIRQ(SysTimer_IRQn, configKERNEL_INTERRUPT_PRIORITY); + ECLIC_SetShvIRQ(SysTimer_IRQn, ECLIC_NON_VECTOR_INTERRUPT); + + /* Set SWI interrupt level to lowest level/priority, SysTimerSW as Vector Interrupt */ + ECLIC_SetShvIRQ(SysTimerSW_IRQn, ECLIC_VECTOR_INTERRUPT); + ECLIC_SetLevelIRQ(SysTimerSW_IRQn, configKERNEL_INTERRUPT_PRIORITY); + SysTimer_ClearSWIRQ(); + ECLIC_EnableIRQ(SysTimerSW_IRQn); +} + +enum handler_return riscv_mtip_handler(void) { + LTRACEF("tick\n"); + + ECLIC_DisableIRQ(SysTimer_IRQn); + + enum handler_return ret = INT_NO_RESCHEDULE; + unsigned long state; + state = riscv_csr_read_clear(RISCV_CSR_XSTATUS, RISCV_CSR_XSTATUS_IE) & RISCV_CSR_XSTATUS_IE; + if (timer_cb) { + ret = timer_cb(timer_arg, current_time()); + } + riscv_clic_irq_exit(ret); + riscv_csr_set(RISCV_CSR_XSTATUS, state); + + return ret; +} + +#else status_t platform_set_oneshot_timer (platform_timer_callback callback, void *arg, lk_time_t interval) { LTRACEF("cb %p, arg %p, interval %u\n", callback, arg, interval); @@ -75,3 +147,4 @@ enum handler_return riscv_timer_exception(void) { return ret; } +#endif \ No newline at end of file diff --git a/arch/riscv/vectab.S b/arch/riscv/vectab.S new file mode 100644 index 000000000..b5cf8f3a7 --- /dev/null +++ b/arch/riscv/vectab.S @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Nuclei System Technology + * + * Use of this source code is governed by a MIT-style + * license that can be found in the LICENSE file or at + * https://opensource.org/licenses/MIT + */ +#ifdef RISCV_VARIANT_NUCLEI + +#include "riscv_encoding.h" + +.macro DECLARE_INT_HANDLER INT_HDL_NAME +#if defined(__riscv_xlen) && (__riscv_xlen == 32) + .word \INT_HDL_NAME +#else + .dword \INT_HDL_NAME +#endif +.endm + + .section .text.boot.vectab1 + + .weak riscv_msip_handler + .weak riscv_mtip_handler + + .global vectab +vectab: + j _start /* 0: Reserved, Jump to _start when reset for ILM/FlashXIP mode.*/ + .align LOG_REGBYTES /* Need to align 4 byte for RV32, 8 Byte for RV64 */ + DECLARE_INT_HANDLER default_intexc_handler /* 1: Reserved */ + DECLARE_INT_HANDLER default_intexc_handler /* 2: Reserved */ + DECLARE_INT_HANDLER riscv_msip_handler /* 3: Machine software interrupt */ + + DECLARE_INT_HANDLER default_intexc_handler /* 4: Reserved */ + DECLARE_INT_HANDLER default_intexc_handler /* 5: Reserved */ + DECLARE_INT_HANDLER default_intexc_handler /* 6: Reserved */ + DECLARE_INT_HANDLER riscv_mtip_handler /* 7: Machine timer interrupt */ + + DECLARE_INT_HANDLER default_intexc_handler /* 8: Reserved */ + DECLARE_INT_HANDLER default_intexc_handler /* 9: Reserved */ + DECLARE_INT_HANDLER default_intexc_handler /* 10: Reserved */ + DECLARE_INT_HANDLER default_intexc_handler /* 11: Reserved */ + + DECLARE_INT_HANDLER default_intexc_handler /* 12: Reserved */ + DECLARE_INT_HANDLER default_intexc_handler /* 13: Reserved */ + DECLARE_INT_HANDLER default_intexc_handler /* 14: Reserved */ + DECLARE_INT_HANDLER default_intexc_handler /* 15: Reserved */ + + DECLARE_INT_HANDLER default_intexc_handler /* 16: Reserved */ + DECLARE_INT_HANDLER default_intexc_handler /* 17: Reserved */ + DECLARE_INT_HANDLER default_intexc_handler /* 18: Reserved */ + +#endif \ No newline at end of file diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/core_compatiable.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_compatiable.h new file mode 100644 index 000000000..40a9198e1 --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_compatiable.h @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CORE_COMPATIABLE_H__ +#define __CORE_COMPATIABLE_H__ +/*! + * @file core_compatiable.h + * @brief ARM compatiable function definitions header file + */ +#ifdef __cplusplus + extern "C" { +#endif + +/* ===== ARM Compatiable Functions ===== */ +/** + * \defgroup NMSIS_Core_ARMCompatiable_Functions ARM Compatiable Functions + * \ingroup NMSIS_Core + * \brief A few functions that compatiable with ARM CMSIS-Core. + * \details + * + * Here we provided a few functions that compatiable with ARM CMSIS-Core, + * mostly used in the DSP and NN library. + * @{ + */ +/** \brief Instruction Synchronization Barrier, compatiable with ARM */ +#define __ISB() __RWMB() + +/** \brief Data Synchronization Barrier, compatiable with ARM */ +#define __DSB() __RWMB() + +/** \brief Data Memory Barrier, compatiable with ARM */ +#define __DMB() __RWMB() + +/** \brief LDRT Unprivileged (8 bit), ARM Compatiable */ +#define __LDRBT(ptr) __LB((ptr)) +/** \brief LDRT Unprivileged (16 bit), ARM Compatiable */ +#define __LDRHT(ptr) __LH((ptr)) +/** \brief LDRT Unprivileged (32 bit), ARM Compatiable */ +#define __LDRT(ptr) __LW((ptr)) + +/** \brief STRT Unprivileged (8 bit), ARM Compatiable */ +#define __STRBT(ptr) __SB((ptr)) +/** \brief STRT Unprivileged (16 bit), ARM Compatiable */ +#define __STRHT(ptr) __SH((ptr)) +/** \brief STRT Unprivileged (32 bit), ARM Compatiable */ +#define __STRT(ptr) __SW((ptr)) + +/* ===== Saturation Operations ===== */ +/** + * \brief Signed Saturate + * \details Saturates a signed value. + * \param [in] value Value to be saturated + * \param [in] sat Bit position to saturate to (1..32) + * \return Saturated value + */ +#if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) +#define __SSAT(val, sat) __RV_SCLIP32((val), (sat-1)) +#else +__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat) +{ + if ((sat >= 1U) && (sat <= 32U)) { + const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U); + const int32_t min = -1 - max ; + if (val > max) { + return max; + } else if (val < min) { + return min; + } + } + return val; +} +#endif + +/** + * \brief Unsigned Saturate + * \details Saturates an unsigned value. + * \param [in] value Value to be saturated + * \param [in] sat Bit position to saturate to (0..31) + * \return Saturated value + */ +#if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) +#define __USAT(val, sat) __RV_UCLIP32((val), (sat-1)) +#else +__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat) +{ + if (sat <= 31U) { + const uint32_t max = ((1U << sat) - 1U); + if (val > (int32_t)max) { + return max; + } else if (val < 0) { + return 0U; + } + } + return (uint32_t)val; +} +#endif + +/* ===== Data Processing Operations ===== */ +/** + * \brief Reverse byte order (32 bit) + * \details Reverses the byte order in unsigned integer value. + * For example, 0x12345678 becomes 0x78563412. + * \param [in] value Value to reverse + * \return Reversed value + */ +__STATIC_FORCEINLINE uint32_t __REV(uint32_t value) +{ + uint32_t result; + + result = ((value & 0xff000000) >> 24) + | ((value & 0x00ff0000) >> 8 ) + | ((value & 0x0000ff00) << 8 ) + | ((value & 0x000000ff) << 24); + return result; +} + +/** + * \brief Reverse byte order (16 bit) + * \details Reverses the byte order within each halfword of a word. + * For example, 0x12345678 becomes 0x34127856. + * \param [in] value Value to reverse + * \return Reversed value + */ +__STATIC_FORCEINLINE uint32_t __REV16(uint32_t value) +{ + uint32_t result; + result = ((value & 0xff000000) >> 8) + | ((value & 0x00ff00000) << 8 ) + | ((value & 0x0000ff00) >> 8 ) + | ((value & 0x000000ff) << 8) ; + + return result; +} + +/** + * \brief Reverse byte order (16 bit) + * \details Reverses the byte order in a 16-bit value + * and returns the signed 16-bit result. + * For example, 0x0080 becomes 0x8000. + * \param [in] value Value to reverse + * \return Reversed value + */ +__STATIC_FORCEINLINE int16_t __REVSH(int16_t value) +{ + int16_t result; + result = ((value & 0xff00) >> 8) | ((value & 0x00ff) << 8); + return result; +} + +/** + * \brief Rotate Right in unsigned value (32 bit) + * \details Rotate Right (immediate) provides the value of + * the contents of a register rotated by a variable number of bits. + * \param [in] op1 Value to rotate + * \param [in] op2 Number of Bits to rotate(0-31) + * \return Rotated value + */ +__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2) +{ + op2 = op2 & 0x1F; + if (op2 == 0U) { + return op1; + } + return (op1 >> op2) | (op1 << (32U - op2)); +} + +/** + * \brief Reverse bit order of value + * \details Reverses the bit order of the given value. + * \param [in] value Value to reverse + * \return Reversed value + */ +#if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) +#define __RBIT(value) __RV_BITREVI((value), 31) +#else +__STATIC_FORCEINLINE uint32_t __RBIT(uint32_t value) +{ + uint32_t result; + uint32_t s = (4U /*sizeof(v)*/ * 8U) - 1U; /* extra shift needed at end */ + + result = value; /* r will be reversed bits of v; first get LSB of v */ + for (value >>= 1U; value != 0U; value >>= 1U) { + result <<= 1U; + result |= value & 1U; + s--; + } + result <<= s; /* shift when v's highest bits are zero */ + return result; +} +#endif /* defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) */ + +/** + * \brief Count leading zeros + * \details Counts the number of leading zeros of a data value. + * \param [in] data Value to count the leading zeros + * \return number of leading zeros in value + */ +#if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) +#define __CLZ(data) __RV_CLZ32(data) +#else +__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data) +{ + uint8_t ret = 0; + uint32_t temp = ~data; + while (temp & 0x80000000) { + temp <<= 1; + ret++; + } + return ret; +} +#endif /* defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) */ + +/** @} */ /* End of Doxygen Group NMSIS_Core_ARMCompatiable_Functions */ + +#ifdef __cplusplus +} +#endif +#endif /* __CORE_COMPATIABLE_H__ */ diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_base.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_base.h new file mode 100644 index 000000000..5f351a336 --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_base.h @@ -0,0 +1,1177 @@ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CORE_FEATURE_BASE__ +#define __CORE_FEATURE_BASE__ +/*! + * @file core_feature_base.h + * @brief Base core feature API for Nuclei N/NX Core + */ +#include +#include "riscv_encoding.h" + +#ifdef __cplusplus + extern "C" { +#endif + +/** + * \defgroup NMSIS_Core_Registers Register Define and Type Definitions + * \brief Type definitions and defines for core registers. + * + * @{ + */ +#ifndef __RISCV_XLEN + /** \brief Refer to the width of an integer register in bits(either 32 or 64) */ + #ifndef __riscv_xlen + #define __RISCV_XLEN 32 + #else + #define __RISCV_XLEN __riscv_xlen + #endif +#endif /* __RISCV_XLEN */ + +/** \brief Type of Control and Status Register(CSR), depends on the XLEN defined in RISC-V */ +#if __RISCV_XLEN == 32 + typedef uint32_t rv_csr_t; +#elif __RISCV_XLEN == 64 + typedef uint64_t rv_csr_t; +#else + typedef uint32_t rv_csr_t; +#endif +/** @} */ /* End of Doxygen Group NMSIS_Core_Registers */ +/** + * \defgroup NMSIS_Core_Base_Registers Base Register Define and Type Definitions + * \ingroup NMSIS_Core_Registers + * \brief Type definitions and defines for base core registers. + * + * @{ + */ +/** + * \brief Union type to access MISA register. + */ +typedef union { + struct { + rv_csr_t a:1; /*!< bit: 0 Atomic extension */ + rv_csr_t b:1; /*!< bit: 1 Tentatively reserved for Bit-Manipulation extension */ + rv_csr_t c:1; /*!< bit: 2 Compressed extension */ + rv_csr_t d:1; /*!< bit: 3 Double-precision floating-point extension */ + rv_csr_t e:1; /*!< bit: 4 RV32E base ISA */ + rv_csr_t f:1; /*!< bit: 5 Single-precision floating-point extension */ + rv_csr_t g:1; /*!< bit: 6 Additional standard extensions present */ + rv_csr_t h:1; /*!< bit: 7 Hypervisor extension */ + rv_csr_t i:1; /*!< bit: 8 RV32I/64I/128I base ISA */ + rv_csr_t j:1; /*!< bit: 9 Tentatively reserved for Dynamically Translated Languages extension */ + rv_csr_t _reserved1:1; /*!< bit: 10 Reserved */ + rv_csr_t l:1; /*!< bit: 11 Tentatively reserved for Decimal Floating-Point extension */ + rv_csr_t m:1; /*!< bit: 12 Integer Multiply/Divide extension */ + rv_csr_t n:1; /*!< bit: 13 User-level interrupts supported */ + rv_csr_t _reserved2:1; /*!< bit: 14 Reserved */ + rv_csr_t p:1; /*!< bit: 15 Tentatively reserved for Packed-SIMD extension */ + rv_csr_t q:1; /*!< bit: 16 Quad-precision floating-point extension */ + rv_csr_t _resreved3:1; /*!< bit: 17 Reserved */ + rv_csr_t s:1; /*!< bit: 18 Supervisor mode implemented */ + rv_csr_t t:1; /*!< bit: 19 Tentatively reserved for Transactional Memory extension */ + rv_csr_t u:1; /*!< bit: 20 User mode implemented */ + rv_csr_t v:1; /*!< bit: 21 Tentatively reserved for Vector extension */ + rv_csr_t _reserved4:1; /*!< bit: 22 Reserved */ + rv_csr_t x:1; /*!< bit: 23 Non-standard extensions present */ +#if defined(__RISCV_XLEN) && __RISCV_XLEN == 64 + rv_csr_t _reserved5:38; /*!< bit: 24..61 Reserved */ + rv_csr_t mxl:2; /*!< bit: 62..63 Machine XLEN */ +#else + rv_csr_t _reserved5:6; /*!< bit: 24..29 Reserved */ + rv_csr_t mxl:2; /*!< bit: 30..31 Machine XLEN */ +#endif + } b; /*!< Structure used for bit access */ + rv_csr_t d; /*!< Type used for csr data access */ +} CSR_MISA_Type; + +/** + * \brief Union type to access MSTATUS configure register. + */ +typedef union { + struct { +#if defined(__RISCV_XLEN) && __RISCV_XLEN == 64 + rv_csr_t _reserved0:3; /*!< bit: 0..2 Reserved */ + rv_csr_t mie:1; /*!< bit: 3 Machine mode interrupt enable flag */ + rv_csr_t _reserved1:3; /*!< bit: 4..6 Reserved */ + rv_csr_t mpie:1; /*!< bit: 7 mirror of MIE flag */ + rv_csr_t _reserved2:3; /*!< bit: 8..10 Reserved */ + rv_csr_t mpp:2; /*!< bit: 11..12 mirror of Privilege Mode */ + rv_csr_t fs:2; /*!< bit: 13..14 FS status flag */ + rv_csr_t xs:2; /*!< bit: 15..16 XS status flag */ + rv_csr_t mprv:1; /*!< bit: Machine mode PMP */ + rv_csr_t _reserved3:14; /*!< bit: 18..31 Reserved */ + rv_csr_t uxl:2; /*!< bit: 32..33 user mode xlen */ + rv_csr_t _reserved6:29; /*!< bit: 34..62 Reserved */ + rv_csr_t sd:1; /*!< bit: Dirty status for XS or FS */ +#else + rv_csr_t _reserved0:1; /*!< bit: 0 Reserved */ + rv_csr_t sie:1; /*!< bit: 1 supervisor interrupt enable flag */ + rv_csr_t _reserved1:1; /*!< bit: 2 Reserved */ + rv_csr_t mie:1; /*!< bit: 3 Machine mode interrupt enable flag */ + rv_csr_t _reserved2:1; /*!< bit: 4 Reserved */ + rv_csr_t spie:1; /*!< bit: 3 Supervisor Privilede mode interrupt enable flag */ + rv_csr_t _reserved3:1; /*!< bit: Reserved */ + rv_csr_t mpie:1; /*!< bit: mirror of MIE flag */ + rv_csr_t _reserved4:3; /*!< bit: Reserved */ + rv_csr_t mpp:2; /*!< bit: mirror of Privilege Mode */ + rv_csr_t fs:2; /*!< bit: FS status flag */ + rv_csr_t xs:2; /*!< bit: XS status flag */ + rv_csr_t mprv:1; /*!< bit: Machine mode PMP */ + rv_csr_t sum:1; /*!< bit: Supervisor Mode load and store protection */ + rv_csr_t _reserved6:12; /*!< bit: 19..30 Reserved */ + rv_csr_t sd:1; /*!< bit: Dirty status for XS or FS */ +#endif + } b; /*!< Structure used for bit access */ + rv_csr_t d; /*!< Type used for csr data access */ +} CSR_MSTATUS_Type; + +/** + * \brief Union type to access MTVEC configure register. + */ +typedef union { + struct { + rv_csr_t mode:6; /*!< bit: 0..5 interrupt mode control */ +#if defined(__RISCV_XLEN) && __RISCV_XLEN == 64 + rv_csr_t addr:58; /*!< bit: 6..63 mtvec address */ +#else + rv_csr_t addr:26; /*!< bit: 6..31 mtvec address */ +#endif + } b; /*!< Structure used for bit access */ + rv_csr_t d; /*!< Type used for csr data access */ +} CSR_MTVEC_Type; + +/** + * \brief Union type to access MCAUSE configure register. + */ +typedef union { + struct { + rv_csr_t exccode:12; /*!< bit: 11..0 exception or interrupt code */ + rv_csr_t _reserved0:4; /*!< bit: 15..12 Reserved */ + rv_csr_t mpil:8; /*!< bit: 23..16 Previous interrupt level */ + rv_csr_t _reserved1:3; /*!< bit: 26..24 Reserved */ + rv_csr_t mpie:1; /*!< bit: 27 Interrupt enable flag before enter interrupt */ + rv_csr_t mpp:2; /*!< bit: 29..28 Privilede mode flag before enter interrupt */ + rv_csr_t minhv:1; /*!< bit: 30 Machine interrupt vector table */ +#if defined(__RISCV_XLEN) && __RISCV_XLEN == 64 + rv_csr_t _reserved2:32; /*!< bit: 31..62 Reserved */ + rv_csr_t interrupt:1; /*!< bit: 63 trap type. 0 means exception and 1 means interrupt */ +#else + rv_csr_t interrupt:1; /*!< bit: 31 trap type. 0 means exception and 1 means interrupt */ +#endif + } b; /*!< Structure used for bit access */ + rv_csr_t d; /*!< Type used for csr data access */ +} CSR_MCAUSE_Type; + +/** + * \brief Union type to access MCOUNTINHIBIT configure register. + */ +typedef union { + struct { + rv_csr_t cy:1; /*!< bit: 0 1 means disable mcycle counter */ + rv_csr_t _reserved0:1; /*!< bit: 1 Reserved */ + rv_csr_t ir:1; /*!< bit: 2 1 means disable minstret counter */ +#if defined(__RISCV_XLEN) && __RISCV_XLEN == 64 + rv_csr_t _reserved1:61; /*!< bit: 3..63 Reserved */ +#else + rv_csr_t _reserved1:29; /*!< bit: 3..31 Reserved */ +#endif + } b; /*!< Structure used for bit access */ + rv_csr_t d; /*!< Type used for csr data access */ +} CSR_MCOUNTINHIBIT_Type; + +/** + * \brief Union type to access msubm configure register. + */ +typedef union { + struct { + rv_csr_t _reserved0:6; /*!< bit: 0..5 Reserved */ + rv_csr_t typ:2; /*!< bit: 6..7 current trap type */ + rv_csr_t ptyp:2; /*!< bit: 8..9 previous trap type */ +#if defined(__RISCV_XLEN) && __RISCV_XLEN == 64 + rv_csr_t _reserved1:54; /*!< bit: 10..63 Reserved */ +#else + rv_csr_t _reserved1:22; /*!< bit: 10..31 Reserved */ +#endif + } b; /*!< Structure used for bit access */ + rv_csr_t d; /*!< Type used for csr data access */ +} CSR_MSUBM_Type; + +/** + * \brief Union type to access MMISC_CTRL configure register. + */ +typedef union { + struct { + rv_csr_t _reserved0:3; /*!< bit: 0..2 Reserved */ + rv_csr_t bpu:1; /*!< bit: 3 dynamic prediction enable flag */ + rv_csr_t _reserved1:2; /*!< bit: 4..5 Reserved */ + rv_csr_t misalign:1; /*!< bit: 6 misaligned access support flag */ + rv_csr_t _reserved2:2; /*!< bit: 7..8 Reserved */ + rv_csr_t nmi_cause:1; /*!< bit: 9 mnvec control and nmi mcase exccode */ +#if defined(__RISCV_XLEN) && __RISCV_XLEN == 64 + rv_csr_t _reserved3:54; /*!< bit: 10..63 Reserved */ +#else + rv_csr_t _reserved3:22; /*!< bit: 10..31 Reserved */ +#endif + } b; /*!< Structure used for bit access */ + rv_csr_t d; /*!< Type used for csr data access */ +} CSR_MMISCCTRL_Type; + + +/** + * \brief Union type to access MSAVESTATUS configure register. + */ +typedef union { + struct { + rv_csr_t mpie1:1; /*!< bit: 0 interrupt enable flag of fisrt level NMI/exception nestting */ + rv_csr_t mpp1:2; /*!< bit: 1..2 privilede mode of fisrt level NMI/exception nestting */ + rv_csr_t _reserved0:3; /*!< bit: 3..5 Reserved */ + rv_csr_t ptyp1:2; /*!< bit: 6..7 NMI/exception type of before first nestting */ + rv_csr_t mpie2:1; /*!< bit: 8 interrupt enable flag of second level NMI/exception nestting */ + rv_csr_t mpp2:2; /*!< bit: 9..10 privilede mode of second level NMI/exception nestting */ + rv_csr_t _reserved1:3; /*!< bit: 11..13 Reserved */ + rv_csr_t ptyp2:2; /*!< bit: 14..15 NMI/exception type of before second nestting */ +#if defined(__RISCV_XLEN) && __RISCV_XLEN == 64 + rv_csr_t _reserved2:48; /*!< bit: 16..63 Reserved*/ +#else + rv_csr_t _reserved2:16; /*!< bit: 16..31 Reserved*/ +#endif + } b; /*!< Structure used for bit access */ + rv_csr_t w; /*!< Type used for csr data access */ +} CSR_MSAVESTATUS_Type; +/** @} */ /* End of Doxygen Group NMSIS_Core_Base_Registers */ + +/* ########################### Core Function Access ########################### */ +/** + * \defgroup NMSIS_Core_CSR_Register_Access Core CSR Register Access + * \ingroup NMSIS_Core + * \brief Functions to access the Core CSR Registers + * \details + * + * The following functions or macros provide access to Core CSR registers. + * - \ref NMSIS_Core_CSR_Encoding + * - \ref NMSIS_Core_CSR_Registers + * @{ + */ + + +#ifndef __ASSEMBLY__ + +/** + * \brief CSR operation Macro for csrrw instruction. + * \details + * Read the content of csr register to __v, + * then write content of val into csr register, then return __v + * \param csr CSR macro definition defined in + * \ref NMSIS_Core_CSR_Registers, eg. \ref CSR_MSTATUS + * \param val value to store into the CSR register + * \return the CSR register value before written + */ +#define __RV_CSR_SWAP(csr, val) \ + ({ \ + register rv_csr_t __v = (unsigned long)(val); \ + __ASM volatile("csrrw %0, " STRINGIFY(csr) ", %1" \ + : "=r"(__v) \ + : "rK"(__v) \ + : "memory"); \ + __v; \ + }) + +/** + * \brief CSR operation Macro for csrr instruction. + * \details + * Read the content of csr register to __v and return it + * \param csr CSR macro definition defined in + * \ref NMSIS_Core_CSR_Registers, eg. \ref CSR_MSTATUS + * \return the CSR register value + */ +#define __RV_CSR_READ(csr) \ + ({ \ + register rv_csr_t __v; \ + __ASM volatile("csrr %0, " STRINGIFY(csr) \ + : "=r"(__v) \ + : \ + : "memory"); \ + __v; \ + }) + +/** + * \brief CSR operation Macro for csrw instruction. + * \details + * Write the content of val to csr register + * \param csr CSR macro definition defined in + * \ref NMSIS_Core_CSR_Registers, eg. \ref CSR_MSTATUS + * \param val value to store into the CSR register + */ +#define __RV_CSR_WRITE(csr, val) \ + ({ \ + register rv_csr_t __v = (rv_csr_t)(val); \ + __ASM volatile("csrw " STRINGIFY(csr) ", %0" \ + : \ + : "rK"(__v) \ + : "memory"); \ + }) + +/** + * \brief CSR operation Macro for csrrs instruction. + * \details + * Read the content of csr register to __v, + * then set csr register to be __v | val, then return __v + * \param csr CSR macro definition defined in + * \ref NMSIS_Core_CSR_Registers, eg. \ref CSR_MSTATUS + * \param val Mask value to be used wih csrrs instruction + * \return the CSR register value before written + */ +#define __RV_CSR_READ_SET(csr, val) \ + ({ \ + register rv_csr_t __v = (rv_csr_t)(val); \ + __ASM volatile("csrrs %0, " STRINGIFY(csr) ", %1" \ + : "=r"(__v) \ + : "rK"(__v) \ + : "memory"); \ + __v; \ + }) + +/** + * \brief CSR operation Macro for csrs instruction. + * \details + * Set csr register to be csr_content | val + * \param csr CSR macro definition defined in + * \ref NMSIS_Core_CSR_Registers, eg. \ref CSR_MSTATUS + * \param val Mask value to be used wih csrs instruction + */ +#define __RV_CSR_SET(csr, val) \ + ({ \ + register rv_csr_t __v = (rv_csr_t)(val); \ + __ASM volatile("csrs " STRINGIFY(csr) ", %0" \ + : \ + : "rK"(__v) \ + : "memory"); \ + }) + +/** + * \brief CSR operation Macro for csrrc instruction. + * \details + * Read the content of csr register to __v, + * then set csr register to be __v & ~val, then return __v + * \param csr CSR macro definition defined in + * \ref NMSIS_Core_CSR_Registers, eg. \ref CSR_MSTATUS + * \param val Mask value to be used wih csrrc instruction + * \return the CSR register value before written + */ +#define __RV_CSR_READ_CLEAR(csr, val) \ + ({ \ + register rv_csr_t __v = (rv_csr_t)(val); \ + __ASM volatile("csrrc %0, " STRINGIFY(csr) ", %1" \ + : "=r"(__v) \ + : "rK"(__v) \ + : "memory"); \ + __v; \ + }) + +/** + * \brief CSR operation Macro for csrc instruction. + * \details + * Set csr register to be csr_content & ~val + * \param csr CSR macro definition defined in + * \ref NMSIS_Core_CSR_Registers, eg. \ref CSR_MSTATUS + * \param val Mask value to be used wih csrc instruction + */ +#define __RV_CSR_CLEAR(csr, val) \ + ({ \ + register rv_csr_t __v = (rv_csr_t)(val); \ + __ASM volatile("csrc " STRINGIFY(csr) ", %0" \ + : \ + : "rK"(__v) \ + : "memory"); \ + }) +#endif /* __ASSEMBLY__ */ + +/** + * \brief Enable IRQ Interrupts + * \details Enables IRQ interrupts by setting the MIE-bit in the MSTATUS Register. + * \remarks + * Can only be executed in Privileged modes. + */ +__STATIC_FORCEINLINE void __enable_irq(void) +{ + __RV_CSR_SET(CSR_MSTATUS, MSTATUS_MIE); +} + +/** + * \brief Disable IRQ Interrupts + * \details Disables IRQ interrupts by clearing the MIE-bit in the MSTATUS Register. + * \remarks + * Can only be executed in Privileged modes. + */ +__STATIC_FORCEINLINE void __disable_irq(void) +{ + __RV_CSR_CLEAR(CSR_MSTATUS, MSTATUS_MIE); +} + +/** + * \brief Read whole 64 bits value of mcycle counter + * \details This function will read the whole 64 bits of MCYCLE register + * \return The whole 64 bits value of MCYCLE + * \remarks It will work for both RV32 and RV64 to get full 64bits value of MCYCLE + */ +__STATIC_FORCEINLINE uint64_t __get_rv_cycle(void) +{ +#if __RISCV_XLEN == 32 + volatile uint32_t high0, low, high; + uint64_t full; + + high0 = __RV_CSR_READ(CSR_MCYCLEH); + low = __RV_CSR_READ(CSR_MCYCLE); + high = __RV_CSR_READ(CSR_MCYCLEH); + if (high0 != high) { + low = __RV_CSR_READ(CSR_MCYCLE); + } + full = (((uint64_t)high) << 32) | low; + return full; +#elif __RISCV_XLEN == 64 + return (uint64_t)__RV_CSR_READ(CSR_MCYCLE); +#else // TODO Need cover for XLEN=128 case in future + return (uint64_t)__RV_CSR_READ(CSR_MCYCLE); +#endif +} + +/** + * \brief Read whole 64 bits value of machine instruction-retired counter + * \details This function will read the whole 64 bits of MINSTRET register + * \return The whole 64 bits value of MINSTRET + * \remarks It will work for both RV32 and RV64 to get full 64bits value of MINSTRET + */ +__STATIC_FORCEINLINE uint64_t __get_rv_instret(void) +{ +#if __RISCV_XLEN == 32 + volatile uint32_t high0, low, high; + uint64_t full; + + high0 = __RV_CSR_READ(CSR_MINSTRETH); + low = __RV_CSR_READ(CSR_MINSTRET); + high = __RV_CSR_READ(CSR_MINSTRETH); + if (high0 != high) { + low = __RV_CSR_READ(CSR_MINSTRET); + } + full = (((uint64_t)high) << 32) | low; + return full; +#elif __RISCV_XLEN == 64 + return (uint64_t)__RV_CSR_READ(CSR_MINSTRET); +#else // TODO Need cover for XLEN=128 case in future + return (uint64_t)__RV_CSR_READ(CSR_MINSTRET); +#endif +} + +/** + * \brief Read whole 64 bits value of real-time clock + * \details This function will read the whole 64 bits of TIME register + * \return The whole 64 bits value of TIME CSR + * \remarks It will work for both RV32 and RV64 to get full 64bits value of TIME + * \attention only available when user mode available + */ +__STATIC_FORCEINLINE uint64_t __get_rv_time(void) +{ +#if __RISCV_XLEN == 32 + volatile uint32_t high0, low, high; + uint64_t full; + + high0 = __RV_CSR_READ(CSR_TIMEH); + low = __RV_CSR_READ(CSR_TIME); + high = __RV_CSR_READ(CSR_TIMEH); + if (high0 != high) { + low = __RV_CSR_READ(CSR_TIME); + } + full = (((uint64_t)high) << 32) | low; + return full; +#elif __RISCV_XLEN == 64 + return (uint64_t)__RV_CSR_READ(CSR_TIME); +#else // TODO Need cover for XLEN=128 case in future + return (uint64_t)__RV_CSR_READ(CSR_TIME); +#endif +} + +/** @} */ /* End of Doxygen Group NMSIS_Core_CSR_Register_Access */ + +/* ########################### CPU Intrinsic Functions ########################### */ +/** + * \defgroup NMSIS_Core_CPU_Intrinsic Intrinsic Functions for CPU Intructions + * \ingroup NMSIS_Core + * \brief Functions that generate RISC-V CPU instructions. + * \details + * + * The following functions generate specified RISC-V instructions that cannot be directly accessed by compiler. + * @{ + */ + +/** + * \brief NOP Instruction + * \details + * No Operation does nothing. + * This instruction can be used for code alignment purposes. + */ +__STATIC_FORCEINLINE void __NOP(void) +{ + __ASM volatile("nop"); +} + +/** + * \brief Wait For Interrupt + * \details + * Wait For Interrupt is is executed using CSR_WFE.WFE=0 and WFI instruction. + * It will suspends execution until interrupt, NMI or Debug happened. + * When Core is waked up by interrupt, if + * 1. mstatus.MIE == 1(interrupt enabled), Core will enter ISR code + * 2. mstatus.MIE == 0(interrupt disabled), Core will resume previous execution + */ +__STATIC_FORCEINLINE void __WFI(void) +{ + __RV_CSR_CLEAR(CSR_WFE, WFE_WFE); + __ASM volatile("wfi"); +} + +/** + * \brief Wait For Event + * \details + * Wait For Event is executed using CSR_WFE.WFE=1 and WFI instruction. + * It will suspends execution until event, NMI or Debug happened. + * When Core is waked up, Core will resume previous execution + */ +__STATIC_FORCEINLINE void __WFE(void) +{ + __RV_CSR_SET(CSR_WFE, WFE_WFE); + __ASM volatile("wfi"); + __RV_CSR_CLEAR(CSR_WFE, WFE_WFE); +} + +/** + * \brief Breakpoint Instruction + * \details + * Causes the processor to enter Debug state. + * Debug tools can use this to investigate system state + * when the instruction at a particular address is reached. + */ +__STATIC_FORCEINLINE void __EBREAK(void) +{ + __ASM volatile("ebreak"); +} + +/** + * \brief Environment Call Instruction + * \details + * The ECALL instruction is used to make a service request to + * the execution environment. + */ +__STATIC_FORCEINLINE void __ECALL(void) +{ + __ASM volatile("ecall"); +} + +/** + * \brief WFI Sleep Mode enumeration + */ +typedef enum WFI_SleepMode { + WFI_SHALLOW_SLEEP = 0, /*!< Shallow sleep mode, the core_clk will poweroff */ + WFI_DEEP_SLEEP = 1 /*!< Deep sleep mode, the core_clk and core_ano_clk will poweroff */ +} WFI_SleepMode_Type; + +/** + * \brief Set Sleep mode of WFI + * \details + * Set the SLEEPVALUE CSR register to control the + * WFI Sleep mode. + * \param[in] mode The sleep mode to be set + */ +__STATIC_FORCEINLINE void __set_wfi_sleepmode(WFI_SleepMode_Type mode) +{ + __RV_CSR_WRITE(CSR_SLEEPVALUE, mode); +} + +/** + * \brief Send TX Event + * \details + * Set the CSR TXEVT to control send a TX Event. + * The Core will output signal tx_evt as output event signal. + */ +__STATIC_FORCEINLINE void __TXEVT(void) +{ + __RV_CSR_SET(CSR_TXEVT, 0x1); +} + +/** + * \brief Enable MCYCLE counter + * \details + * Clear the CY bit of MCOUNTINHIBIT to 0 to enable MCYCLE Counter + */ +__STATIC_FORCEINLINE void __enable_mcycle_counter(void) +{ + __RV_CSR_CLEAR(CSR_MCOUNTINHIBIT, MCOUNTINHIBIT_CY); +} + +/** + * \brief Disable MCYCLE counter + * \details + * Set the CY bit of MCOUNTINHIBIT to 1 to disable MCYCLE Counter + */ +__STATIC_FORCEINLINE void __disable_mcycle_counter(void) +{ + __RV_CSR_SET(CSR_MCOUNTINHIBIT, MCOUNTINHIBIT_CY); +} + +/** + * \brief Enable MINSTRET counter + * \details + * Clear the IR bit of MCOUNTINHIBIT to 0 to enable MINSTRET Counter + */ +__STATIC_FORCEINLINE void __enable_minstret_counter(void) +{ + __RV_CSR_CLEAR(CSR_MCOUNTINHIBIT, MCOUNTINHIBIT_IR); +} + +/** + * \brief Disable MINSTRET counter + * \details + * Set the IR bit of MCOUNTINHIBIT to 1 to disable MINSTRET Counter + */ +__STATIC_FORCEINLINE void __disable_minstret_counter(void) +{ + __RV_CSR_SET(CSR_MCOUNTINHIBIT, MCOUNTINHIBIT_IR); +} + +/** + * \brief Enable MCYCLE & MINSTRET counter + * \details + * Clear the IR and CY bit of MCOUNTINHIBIT to 1 to enable MINSTRET & MCYCLE Counter + */ +__STATIC_FORCEINLINE void __enable_all_counter(void) +{ + __RV_CSR_CLEAR(CSR_MCOUNTINHIBIT, MCOUNTINHIBIT_IR|MCOUNTINHIBIT_CY); +} + +/** + * \brief Disable MCYCLE & MINSTRET counter + * \details + * Set the IR and CY bit of MCOUNTINHIBIT to 1 to disable MINSTRET & MCYCLE Counter + */ +__STATIC_FORCEINLINE void __disable_all_counter(void) +{ + __RV_CSR_SET(CSR_MCOUNTINHIBIT, MCOUNTINHIBIT_IR|MCOUNTINHIBIT_CY); +} + +/** + * \brief Execute fence instruction, p -> pred, s -> succ + * \details + * the FENCE instruction ensures that all memory accesses from instructions preceding + * the fence in program order (the `predecessor set`) appear earlier in the global memory order than + * memory accesses from instructions appearing after the fence in program order (the `successor set`). + * For details, please refer to The RISC-V Instruction Set Manual + * \param p predecessor set, such as iorw, rw, r, w + * \param s successor set, such as iorw, rw, r, w + **/ +#define __FENCE(p, s) __ASM volatile ("fence " #p "," #s : : : "memory") + +/** + * \brief Fence.i Instruction + * \details + * The FENCE.I instruction is used to synchronize the instruction + * and data streams. + */ +__STATIC_FORCEINLINE void __FENCE_I(void) +{ + __ASM volatile("fence.i"); +} + +/** \brief Read & Write Memory barrier */ +#define __RWMB() __FENCE(iorw,iorw) + +/** \brief Read Memory barrier */ +#define __RMB() __FENCE(ir,ir) + +/** \brief Write Memory barrier */ +#define __WMB() __FENCE(ow,ow) + +/** \brief SMP Read & Write Memory barrier */ +#define __SMP_RWMB() __FENCE(rw,rw) + +/** \brief SMP Read Memory barrier */ +#define __SMP_RMB() __FENCE(r,r) + +/** \brief SMP Write Memory barrier */ +#define __SMP_WMB() __FENCE(w,w) + +/** \brief CPU relax for busy loop */ +#define __CPU_RELAX() __ASM volatile ("" : : : "memory") + + +/* ===== Load/Store Operations ===== */ +/** + * \brief Load 8bit value from address (8 bit) + * \details Load 8 bit value. + * \param [in] addr Address pointer to data + * \return value of type uint8_t at (*addr) + */ +__STATIC_FORCEINLINE uint8_t __LB(volatile void *addr) +{ + uint8_t result; + + __ASM volatile ("lb %0, 0(%1)" : "=r" (result) : "r" (addr)); + return result; +} + +/** + * \brief Load 16bit value from address (16 bit) + * \details Load 16 bit value. + * \param [in] addr Address pointer to data + * \return value of type uint16_t at (*addr) + */ +__STATIC_FORCEINLINE uint16_t __LH(volatile void *addr) +{ + uint16_t result; + + __ASM volatile ("lh %0, 0(%1)" : "=r" (result) : "r" (addr)); + return result; +} + +/** + * \brief Load 32bit value from address (32 bit) + * \details Load 32 bit value. + * \param [in] addr Address pointer to data + * \return value of type uint32_t at (*addr) + */ +__STATIC_FORCEINLINE uint32_t __LW(volatile void *addr) +{ + uint32_t result; + + __ASM volatile ("lw %0, 0(%1)" : "=r" (result) : "r" (addr)); + return result; +} + +#if __RISCV_XLEN != 32 +/** + * \brief Load 64bit value from address (64 bit) + * \details Load 64 bit value. + * \param [in] addr Address pointer to data + * \return value of type uint64_t at (*addr) + * \remarks RV64 only macro + */ +__STATIC_FORCEINLINE uint64_t __LD(volatile void *addr) +{ + uint64_t result; + __ASM volatile ("ld %0, 0(%1)" : "=r" (result) : "r" (addr)); + return result; +} +#endif + +/** + * \brief Write 8bit value to address (8 bit) + * \details Write 8 bit value. + * \param [in] addr Address pointer to data + * \param [in] val Value to set + */ +__STATIC_FORCEINLINE void __SB(volatile void *addr, uint8_t val) +{ + __ASM volatile ("sb %0, 0(%1)" : : "r" (val), "r" (addr)); +} + +/** + * \brief Write 16bit value to address (16 bit) + * \details Write 16 bit value. + * \param [in] addr Address pointer to data + * \param [in] val Value to set + */ +__STATIC_FORCEINLINE void __SH(volatile void *addr, uint16_t val) +{ + __ASM volatile ("sh %0, 0(%1)" : : "r" (val), "r" (addr)); +} + +/** + * \brief Write 32bit value to address (32 bit) + * \details Write 32 bit value. + * \param [in] addr Address pointer to data + * \param [in] val Value to set + */ +__STATIC_FORCEINLINE void __SW(volatile void *addr, uint32_t val) +{ + __ASM volatile ("sw %0, 0(%1)" : : "r" (val), "r" (addr)); +} + +#if __RISCV_XLEN != 32 +/** + * \brief Write 64bit value to address (64 bit) + * \details Write 64 bit value. + * \param [in] addr Address pointer to data + * \param [in] val Value to set + */ +__STATIC_FORCEINLINE void __SD(volatile void *addr, uint64_t val) +{ + __ASM volatile ("sd %0, 0(%1)" : : "r" (val), "r" (addr)); +} +#endif + +/** + * \brief Compare and Swap 32bit value using LR and SC + * \details Compare old value with memory, if identical, + * store new value in memory. Return the initial value in memory. + * Success is indicated by comparing return value with OLD. + * memory address, return 0 if successful, otherwise return !0 + * \param [in] addr Address pointer to data, address need to be 4byte aligned + * \param [in] oldval Old value of the data in address + * \param [in] newval New value to be stored into the address + * \return return the initial value in memory + */ +__STATIC_FORCEINLINE uint32_t __CAS_W(volatile uint32_t *addr, uint32_t oldval, uint32_t newval) +{ + register uint32_t result; + register uint32_t rc; + + __ASM volatile ( \ + "0: lr.w %0, %2 \n" \ + " bne %0, %z3, 1f \n" \ + " sc.w %1, %z4, %2 \n" \ + " bnez %1, 0b \n" \ + "1:\n" \ + : "=&r"(result), "=&r"(rc), "+A"(*addr) \ + : "r"(oldval), "r"(newval) \ + : "memory"); + return result; +} + +/** + * \brief Atomic Swap 32bit value into memory + * \details Atomically swap new 32bit value into memory using amoswap.d. + * \param [in] addr Address pointer to data, address need to be 4byte aligned + * \param [in] newval New value to be stored into the address + * \return return the original value in memory + */ +__STATIC_FORCEINLINE uint32_t __AMOSWAP_W(volatile uint32_t *addr, uint32_t newval) +{ + register uint32_t result; + + __ASM volatile ("amoswap.w %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(newval) : "memory"); + return result; +} + +/** + * \brief Atomic Add with 32bit value + * \details Atomically ADD 32bit value with value in memory using amoadd.d. + * \param [in] addr Address pointer to data, address need to be 4byte aligned + * \param [in] value value to be ADDed + * \return return memory value + add value + */ +__STATIC_FORCEINLINE int32_t __AMOADD_W(volatile int32_t *addr, int32_t value) +{ + register int32_t result; + + __ASM volatile ("amoadd.w %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic And with 32bit value + * \details Atomically AND 32bit value with value in memory using amoand.d. + * \param [in] addr Address pointer to data, address need to be 4byte aligned + * \param [in] value value to be ANDed + * \return return memory value & and value + */ +__STATIC_FORCEINLINE int32_t __AMOAND_W(volatile int32_t *addr, int32_t value) +{ + register int32_t result; + + __ASM volatile ("amoand.w %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic OR with 32bit value + * \details Atomically OR 32bit value with value in memory using amoor.d. + * \param [in] addr Address pointer to data, address need to be 4byte aligned + * \param [in] value value to be ORed + * \return return memory value | and value + */ +__STATIC_FORCEINLINE int32_t __AMOOR_W(volatile int32_t *addr, int32_t value) +{ + register int32_t result; + + __ASM volatile ("amoor.w %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic XOR with 32bit value + * \details Atomically XOR 32bit value with value in memory using amoxor.d. + * \param [in] addr Address pointer to data, address need to be 4byte aligned + * \param [in] value value to be XORed + * \return return memory value ^ and value + */ +__STATIC_FORCEINLINE int32_t __AMOXOR_W(volatile int32_t *addr, int32_t value) +{ + register int32_t result; + + __ASM volatile ("amoxor.w %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic unsigned MAX with 32bit value + * \details Atomically unsigned max compare 32bit value with value in memory using amomaxu.d. + * \param [in] addr Address pointer to data, address need to be 4byte aligned + * \param [in] value value to be compared + * \return return the bigger value + */ +__STATIC_FORCEINLINE uint32_t __AMOMAXU_W(volatile uint32_t *addr, uint32_t value) +{ + register uint32_t result; + + __ASM volatile ("amomaxu.w %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic signed MAX with 32bit value + * \details Atomically signed max compare 32bit value with value in memory using amomax.d. + * \param [in] addr Address pointer to data, address need to be 4byte aligned + * \param [in] value value to be compared + * \return the bigger value + */ +__STATIC_FORCEINLINE int32_t __AMOMAX_W(volatile int32_t *addr, int32_t value) +{ + register int32_t result; + + __ASM volatile ("amomax.w %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic unsigned MIN with 32bit value + * \details Atomically unsigned min compare 32bit value with value in memory using amominu.d. + * \param [in] addr Address pointer to data, address need to be 4byte aligned + * \param [in] value value to be compared + * \return the smaller value + */ +__STATIC_FORCEINLINE uint32_t __AMOMINU_W(volatile uint32_t *addr, uint32_t value) +{ + register uint32_t result; + + __ASM volatile ("amominu.w %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic signed MIN with 32bit value + * \details Atomically signed min compare 32bit value with value in memory using amomin.d. + * \param [in] addr Address pointer to data, address need to be 4byte aligned + * \param [in] value value to be compared + * \return the smaller value + */ +__STATIC_FORCEINLINE int32_t __AMOMIN_W(volatile int32_t *addr, int32_t value) +{ + register int32_t result; + + __ASM volatile ("amomin.w %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +#if __RISCV_XLEN == 64 +/** + * \brief Compare and Swap 64bit value using LR and SC + * \details Compare old value with memory, if identical, + * store new value in memory. Return the initial value in memory. + * Success is indicated by comparing return value with OLD. + * memory address, return 0 if successful, otherwise return !0 + * \param [in] addr Address pointer to data, address need to be 8byte aligned + * \param [in] oldval Old value of the data in address + * \param [in] newval New value to be stored into the address + * \return return the initial value in memory + */ +__STATIC_FORCEINLINE uint64_t __CAS_D(volatile uint64_t *addr, uint64_t oldval, uint64_t newval) +{ + register uint64_t result; + register uint64_t rc; + + __ASM volatile ( \ + "0: lr.d %0, %2 \n" \ + " bne %0, %z3, 1f \n" \ + " sc.d %1, %z4, %2 \n" \ + " bnez %1, 0b \n" \ + "1:\n" \ + : "=&r"(result), "=&r"(rc), "+A"(*addr) \ + : "r"(oldval), "r"(newval) \ + : "memory"); + return result; +} + +/** + * \brief Atomic Swap 64bit value into memory + * \details Atomically swap new 64bit value into memory using amoswap.d. + * \param [in] addr Address pointer to data, address need to be 8byte aligned + * \param [in] newval New value to be stored into the address + * \return return the original value in memory + */ +__STATIC_FORCEINLINE uint64_t __AMOSWAP_D(volatile uint64_t *addr, uint64_t newval) +{ + register uint64_t result; + + __ASM volatile ("amoswap.d %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(newval) : "memory"); + return result; +} + +/** + * \brief Atomic Add with 64bit value + * \details Atomically ADD 64bit value with value in memory using amoadd.d. + * \param [in] addr Address pointer to data, address need to be 8byte aligned + * \param [in] value value to be ADDed + * \return return memory value + add value + */ +__STATIC_FORCEINLINE int64_t __AMOADD_D(volatile int64_t *addr, int64_t value) +{ + register int64_t result; + + __ASM volatile ("amoadd.d %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic And with 64bit value + * \details Atomically AND 64bit value with value in memory using amoand.d. + * \param [in] addr Address pointer to data, address need to be 8byte aligned + * \param [in] value value to be ANDed + * \return return memory value & and value + */ +__STATIC_FORCEINLINE int64_t __AMOAND_D(volatile int64_t *addr, int64_t value) +{ + register int64_t result; + + __ASM volatile ("amoand.d %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic OR with 64bit value + * \details Atomically OR 64bit value with value in memory using amoor.d. + * \param [in] addr Address pointer to data, address need to be 8byte aligned + * \param [in] value value to be ORed + * \return return memory value | and value + */ +__STATIC_FORCEINLINE int64_t __AMOOR_D(volatile int64_t *addr, int64_t value) +{ + register int64_t result; + + __ASM volatile ("amoor.d %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic XOR with 64bit value + * \details Atomically XOR 64bit value with value in memory using amoxor.d. + * \param [in] addr Address pointer to data, address need to be 8byte aligned + * \param [in] value value to be XORed + * \return return memory value ^ and value + */ +__STATIC_FORCEINLINE int64_t __AMOXOR_D(volatile int64_t *addr, int64_t value) +{ + register int64_t result; + + __ASM volatile ("amoxor.d %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic unsigned MAX with 64bit value + * \details Atomically unsigned max compare 64bit value with value in memory using amomaxu.d. + * \param [in] addr Address pointer to data, address need to be 8byte aligned + * \param [in] value value to be compared + * \return return the bigger value + */ +__STATIC_FORCEINLINE uint64_t __AMOMAXU_D(volatile uint64_t *addr, uint64_t value) +{ + register uint64_t result; + + __ASM volatile ("amomaxu.d %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic signed MAX with 64bit value + * \details Atomically signed max compare 64bit value with value in memory using amomax.d. + * \param [in] addr Address pointer to data, address need to be 8byte aligned + * \param [in] value value to be compared + * \return the bigger value + */ +__STATIC_FORCEINLINE int64_t __AMOMAX_D(volatile int64_t *addr, int64_t value) +{ + register int64_t result; + + __ASM volatile ("amomax.d %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic unsigned MIN with 64bit value + * \details Atomically unsigned min compare 64bit value with value in memory using amominu.d. + * \param [in] addr Address pointer to data, address need to be 8byte aligned + * \param [in] value value to be compared + * \return the smaller value + */ +__STATIC_FORCEINLINE uint64_t __AMOMINU_D(volatile uint64_t *addr, uint64_t value) +{ + register uint64_t result; + + __ASM volatile ("amominu.d %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} + +/** + * \brief Atomic signed MIN with 64bit value + * \details Atomically signed min compare 64bit value with value in memory using amomin.d. + * \param [in] addr Address pointer to data, address need to be 8byte aligned + * \param [in] value value to be compared + * \return the smaller value + */ +__STATIC_FORCEINLINE int64_t __AMOMIN_D(volatile int64_t *addr, int64_t value) +{ + register int64_t result; + + __ASM volatile ("amomin.d %0, %2, %1" : \ + "=r"(result), "+A"(*addr) : "r"(value) : "memory"); + return *addr; +} +#endif /* __RISCV_XLEN == 64 */ + +/** @} */ /* End of Doxygen Group NMSIS_Core_CPU_Intrinsic */ + +#ifdef __cplusplus +} +#endif +#endif /* __CORE_FEATURE_BASE__ */ diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_cache.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_cache.h new file mode 100644 index 000000000..38b9eb972 --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_cache.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CORE_FEATURE_CACHE_H__ +#define __CORE_FEATURE_CACHE_H__ +/*! + * @file core_feature_cache.h + * @brief Cache feature API header file for Nuclei N/NX Core + */ +/* + * Cache Feature Configuration Macro: + * 1. __ICACHE_PRESENT: Define whether I-Cache Unit is present or not. + * * 0: Not present + * * 1: Present + * 1. __DCACHE_PRESENT: Define whether D-Cache Unit is present or not. + * * 0: Not present + * * 1: Present + */ +#ifdef __cplusplus + extern "C" { +#endif + +#if defined(__ICACHE_PRESENT) && (__ICACHE_PRESENT == 1) + +/* ########################## Cache functions #################################### */ +/** + * \defgroup NMSIS_Core_Cache Cache Functions + * \brief Functions that configure Instruction and Data Cache. + * @{ + */ + +/** @} */ /* End of Doxygen Group NMSIS_Core_Cache */ + +/** + * \defgroup NMSIS_Core_ICache I-Cache Functions + * \ingroup NMSIS_Core_Cache + * \brief Functions that configure Instruction Cache. + * @{ + */ +/** + * \brief Enable ICache + * \details + * This function enable I-Cache + * \remarks + * - This \ref CSR_MCACHE_CTL register control I Cache enable. + * \sa + * - \ref DisableICache +*/ +__STATIC_FORCEINLINE void EnableICache (void) +{ + __RV_CSR_SET(CSR_MCACHE_CTL, CSR_MCACHE_CTL_IE); +} + +/** + * \brief Disable ICache + * \details + * This function Disable I-Cache + * \remarks + * - This \ref CSR_MCACHE_CTL register control I Cache enable. + * \sa + * - \ref EnableICache + */ +__STATIC_FORCEINLINE void DisableICache (void) +{ + __RV_CSR_CLEAR(CSR_MCACHE_CTL, CSR_MCACHE_CTL_IE); +} +/** @} */ /* End of Doxygen Group NMSIS_Core_ICache */ +#endif /* defined(__ICACHE_PRESENT) && (__ICACHE_PRESENT == 1) */ + +#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1) +/** + * \defgroup NMSIS_Core_DCache D-Cache Functions + * \ingroup NMSIS_Core_Cache + * \brief Functions that configure Data Cache. + * @{ + */ +/** + * \brief Enable DCache + * \details + * This function enable D-Cache + * \remarks + * - This \ref CSR_MCACHE_CTL register control D Cache enable. + * \sa + * - \ref DisableDCache +*/ +__STATIC_FORCEINLINE void EnableDCache (void) +{ + __RV_CSR_SET(CSR_MCACHE_CTL, CSR_MCACHE_CTL_DE); +} + +/** + * \brief Disable DCache + * \details + * This function Disable D-Cache + * \remarks + * - This \ref CSR_MCACHE_CTL register control D Cache enable. + * \sa + * - \ref EnableDCache + */ +__STATIC_FORCEINLINE void DisableDCache (void) +{ + __RV_CSR_CLEAR(CSR_MCACHE_CTL, CSR_MCACHE_CTL_DE); +} +/** @} */ /* End of Doxygen Group NMSIS_Core_DCache */ +#endif /* defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1) */ + +#ifdef __cplusplus +} +#endif +#endif /** __CORE_FEATURE_CACHE_H__ */ diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_dsp.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_dsp.h new file mode 100644 index 000000000..4d41e553e --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_dsp.h @@ -0,0 +1,18659 @@ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CORE_FEATURE_DSP__ +#define __CORE_FEATURE_DSP__ + +/*! + * @file core_feature_dsp.h + * @brief DSP feature API header file for Nuclei N/NX Core + */ +/* + * DSP Feature Configuration Macro: + * 1. __DSP_PRESENT: Define whether Digital Signal Processing Unit(DSP) is present or not + * * 0: Not present + * * 1: Present + */ +#ifdef __cplusplus + extern "C" { +#endif + +#if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) + +/* ########################### CPU SIMD DSP Intrinsic Functions ########################### */ +/** + * \defgroup NMSIS_Core_DSP_Intrinsic Intrinsic Functions for SIMD Instructions + * \ingroup NMSIS_Core + * \brief Functions that generate RISC-V DSP SIMD instructions. + * \details + * + * The following functions generate specified RISC-V SIMD instructions that cannot be directly accessed by compiler. + * * **DSP ISA Extension Instruction Summary** + * + **Shorthand Definitions** + * - r.H == rH1: r[31:16], r.L == r.H0: r[15:0] + * - r.B3: r[31:24], r.B2: r[23:16], r.B1: r[15:8], r.B0: r[7:0] + * - r.B[x]: r[(x*8+7):(x*8+0)] + * - r.H[x]: r[(x*16+7):(x*16+0)] + * - r.W[x]: r[(x*32+31):(x*32+0)] + * - r[xU]: the upper 32-bit of a 64-bit number; xU represents the GPR number that contains this upper part 32-bit value. + * - r[xL]: the lower 32-bit of a 64-bit number; xL represents the GPR number that contains this lower part 32-bit value. + * - r[xU].r[xL]: a 64-bit number that is formed from a pair of GPRs. + * - s>>: signed arithmetic right shift: + * - u>>: unsigned logical right shift + * - SAT.Qn(): Saturate to the range of [-2^n, 2^n-1], if saturation happens, set PSW.OV. + * - SAT.Um(): Saturate to the range of [0, 2^m-1], if saturation happens, set PSW.OV. + * - RUND(): Indicate `rounding`, i.e., add 1 to the most significant discarded bit for right shift or MSW-type multiplication instructions. + * - Sign or Zero Extending functions: + * - SEm(data): Sign-Extend data to m-bit.: + * - ZEm(data): Zero-Extend data to m-bit. + * - ABS(x): Calculate the absolute value of `x`. + * - CONCAT(x,y): Concatinate `x` and `y` to form a value. + * - u<: Unsinged less than comparison. + * - u<=: Unsinged less than & equal comparison. + * - u>: Unsinged greater than comparison. + * - s*: Signed multiplication. + * - u*: Unsigned multiplication. + * + * @{ + */ +/** @} */ /* End of Doxygen Group NMSIS_Core_DSP_Intrinsic */ + + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS SIMD Data Processing Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic + * \brief SIMD Data Processing Instructions + * \details + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB SIMD 16-bit Add/Subtract Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS + * \brief SIMD 16-bit Add/Subtract Instructions + * \details + * Based on the combination of the types of the two 16-bit arithmetic operations, the SIMD 16-bit + * add/subtract instructions can be classified into 6 main categories: Addition (two 16-bit addition), + * Subtraction (two 16-bit subtraction), Crossed Add & Sub (one addition and one subtraction), and + * Crossed Sub & Add (one subtraction and one addition), Straight Add & Sub (one addition and one + * subtraction), and Straight Sub & Add (one subtraction and one addition). + * Based on the way of how an overflow condition is handled, the SIMD 16-bit add/subtract + * instructions can be classified into 5 groups: Wrap-around (dropping overflow), Signed Halving + * (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed Saturation (clipping overflow), + * and Unsigned Saturation. + * Together, there are 30 SIMD 16-bit add/subtract instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB SIMD 8-bit Addition & Subtraction Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS + * \brief SIMD 8-bit Addition & Subtraction Instructions + * \details + * Based on the types of the four 8-bit arithmetic operations, the SIMD 8-bit add/subtract instructions + * can be classified into 2 main categories: Addition (four 8-bit addition), and Subtraction (four 8-bit + * subtraction). + * Based on the way of how an overflow condition is handled for singed or unsigned operation, the + * SIMD 8-bit add/subtract instructions can be classified into 5 groups: Wrap-around (dropping + * overflow), Signed Halving (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed + * Saturation (clipping overflow), and Unsigned Saturation. + * Together, there are 10 SIMD 8-bit add/subtract instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT SIMD 16-bit Shift Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS + * \brief SIMD 16-bit Shift Instructions + * \details + * there are 14 SIMD 16-bit shift instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT SIMD 8-bit Shift Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS + * \brief SIMD 8-bit Shift Instructions + * \details + * there are 14 SIMD 8-bit shift instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP SIMD 16-bit Compare Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS + * \brief SIMD 16-bit Compare Instructions + * \details + * there are 5 SIMD 16-bit Compare instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP SIMD 8-bit Compare Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS + * \brief SIMD 8-bit Compare Instructions + * \details + * there are 5 SIMD 8-bit Compare instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY SIMD 16-bit Multiply Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS + * \brief SIMD 16-bit Multiply Instructions + * \details + * there are 6 SIMD 16-bit Multiply instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY SIMD 8-bit Multiply Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS + * \brief SIMD 8-bit Multiply Instructions + * \details + * there are 6 SIMD 8-bit Multiply instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC SIMD 16-bit Miscellaneous Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS + * \brief SIMD 16-bit Miscellaneous Instructions + * \details + * there are 10 SIMD 16-bit Misc instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC SIMD 8-bit Miscellaneous Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS + * \brief SIMD 8-bit Miscellaneous Instructions + * \details + * there are 10 SIMD 8-bit Miscellaneous instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK SIMD 8-bit Unpacking Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS + * \brief SIMD 8-bit Unpacking Instructions + * \details + * there are 8 SIMD 8-bit Unpacking instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD Non-SIMD Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic + * \brief Non-SIMD Instructions + * \details + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU Non-SIMD Q15 saturation ALU Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD + * \brief Non-SIMD Q15 saturation ALU Instructions + * \details + * there are 7 Non-SIMD Q15 saturation ALU Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU Non-SIMD Q31 saturation ALU Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD + * \brief Non-SIMD Q31 saturation ALU Instructions + * \details + * there are Non-SIMD Q31 saturation ALU Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION 32-bit Computation Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD + * \brief 32-bit Computation Instructions + * \details + * there are 8 32-bit Computation Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC OV (Overflow) flag Set/Clear Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD + * \brief OV (Overflow) flag Set/Clear Instructions + * \details + * The following table lists the user instructions related to Overflow (OV) flag manipulation. there are 2 OV (Overflow) flag Set/Clear Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC Non-SIMD Miscellaneous Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD + * \brief Non-SIMD Miscellaneous Instructions + * \details + * There are 13 Miscellaneous Instructions here. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS Partial-SIMD Data Processing Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic + * \brief Partial-SIMD Data Processing Instructions + * \details + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK SIMD 16-bit Packing Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS + * \brief SIMD 16-bit Packing Instructions + * \details + * there are 4 SIMD16-bit Packing Instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC Signed MSW 32x32 Multiply and Add Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS + * \brief Signed MSW 32x32 Multiply and Add Instructions + * \details + * there are 8 Signed MSW 32x32 Multiply and Add Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC Signed MSW 32x16 Multiply and Add Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS + * \brief Signed MSW 32x16 Multiply and Add Instructions + * \details + * there are 15 Signed MSW 32x16 Multiply and Add Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB Signed 16-bit Multiply 32-bit Add/Subtract Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS + * \brief Signed 16-bit Multiply 32-bit Add/Subtract Instructions + * \details + * there are 18 Signed 16-bit Multiply 32-bit Add/Subtract Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB Signed 16-bit Multiply 64-bit Add/Subtract Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS + * \brief Signed 16-bit Multiply 64-bit Add/Subtract Instructions + * \details + * there is Signed 16-bit Multiply 64-bit Add/Subtract Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC Partial-SIMD Miscellaneous Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS + * \brief Partial-SIMD Miscellaneous Instructions + * \details + * there are 7 Partial-SIMD Miscellaneous Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD 8-bit Multiply with 32-bit Add Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS + * \brief 8-bit Multiply with 32-bit Add Instructions + * \details + * there are 3 8-bit Multiply with 32-bit Add Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE 64-bit Profile Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic + * \brief 64-bit Profile Instructions + * \details + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB 64-bit Addition & Subtraction Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE + * \brief 64-bit Addition & Subtraction Instructions + * \details + * there are 10 64-bit Addition & Subtraction Instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB 32-bit Multiply with 64-bit Add/Subtract Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE + * \brief 32-bit Multiply with 64-bit Add/Subtract Instructions + * \details + * there are 32-bit Multiply 64-bit Add/Subtract Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB Signed 16-bit Multiply with 64-bit Add/Subtract Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE + * \brief Signed 16-bit Multiply with 64-bit Add/Subtract Instructions + * \details + * there are 10 Signed 16-bit Multiply with 64-bit Add/Subtract Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY RV64 Only Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic + * \brief RV64 Only Instructions + * \details + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB (RV64 Only) SIMD 32-bit Add/Subtract Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY + * \brief (RV64 Only) SIMD 32-bit Add/Subtract Instructions + * \details + * The following tables list instructions that are only present in RV64. + * There are 30 SIMD 32-bit addition or subtraction instructions.there are 4 SIMD16-bit Packing Instructions. + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT (RV64 Only) SIMD 32-bit Shift Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY + * \brief (RV64 Only) SIMD 32-bit Shift Instructions + * \details + * there are 14 (RV64 Only) SIMD 32-bit Shift Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC (RV64 Only) SIMD 32-bit Miscellaneous Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY + * \brief (RV64 Only) SIMD 32-bit Miscellaneous Instructions + * \details + * there are 5 (RV64 Only) SIMD 32-bit Miscellaneous Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT (RV64 Only) SIMD Q15 Saturating Multiply Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY + * \brief (RV64 Only) SIMD Q15 Saturating Multiply Instructions + * \details + * there are 9 (RV64 Only) SIMD Q15 saturating Multiply Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT (RV64 Only) 32-bit Multiply Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY + * \brief (RV64 Only) 32-bit Multiply Instructions + * \details + * there is 3 RV64 Only) 32-bit Multiply Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD (RV64 Only) 32-bit Multiply & Add Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY + * \brief (RV64 Only) 32-bit Multiply & Add Instructions + * \details + * there are 3 (RV64 Only) 32-bit Multiply & Add Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC (RV64 Only) 32-bit Parallel Multiply & Add Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY + * \brief (RV64 Only) 32-bit Parallel Multiply & Add Instructions + * \details + * there are 12 (RV64 Only) 32-bit Parallel Multiply & Add Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT (RV64 Only) Non-SIMD 32-bit Shift Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY + * \brief (RV64 Only) Non-SIMD 32-bit Shift Instructions + * \details + * there are 1 (RV64 Only) Non-SIMD 32-bit Shift Instructions + */ + +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK 32-bit Packing Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY + * \brief 32-bit Packing Instructions + * \details + * There are four 32-bit packing instructions here + */ + +/* ===== Inline Function Start for 3.1. ADD8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB + * \brief ADD8 (SIMD 8-bit Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * ADD8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit integer element additions simultaneously. + * + * **Description**:\n + * This instruction adds the 8-bit integer elements in Rs1 with the 8-bit integer elements + * in Rs2, and then writes the 8-bit element results to Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned addition. + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = Rs1.B[x] + Rs2.B[x]; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_ADD8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("add8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.1. ADD8 ===== */ + +/* ===== Inline Function Start for 3.2. ADD16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief ADD16 (SIMD 16-bit Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * ADD16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit integer element additions simultaneously. + * + * **Description**:\n + * This instruction adds the 16-bit integer elements in Rs1 with the 16-bit integer + * elements in Rs2, and then writes the 16-bit element results to Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned addition. + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = Rs1.H[x] + Rs2.H[x]; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_ADD16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("add16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.2. ADD16 ===== */ + +/* ===== Inline Function Start for 3.3. ADD64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB + * \brief ADD64 (64-bit Addition) + * \details + * **Type**: 64-bit Profile + * + * **Syntax**:\n + * ~~~ + * ADD64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Add two 64-bit signed or unsigned integers. + * + * **RV32 Description**:\n + * This instruction adds the 64-bit integer of an even/odd pair of registers specified + * by Rs1(4,1) with the 64-bit integer of an even/odd pair of registers specified by Rs2(4,1), and then + * writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1). + * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register + * pair includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction has the same behavior as the ADD instruction in RV64I. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned addition. + * + * **Operations**:\n + * ~~~ + * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); + * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); + * R[t_H].R[t_L] = R[a_H].R[a_L] + R[b_H].R[b_L]; + * RV64: + * Rd = Rs1 + Rs2; + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b unsigned long long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_ADD64(unsigned long long a, unsigned long long b) +{ + register unsigned long long result; + __ASM volatile("add64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.3. ADD64 ===== */ + +/* ===== Inline Function Start for 3.4. AVE ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC + * \brief AVE (Average with Rounding) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * AVE Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Calculate the average of the contents of two general registers. + * + * **Description**:\n + * This instruction calculates the average value of two signed integers stored in Rs1 and + * Rs2, rounds up a half-integer result to the nearest integer, and writes the result to Rd. + * + * **Operations**:\n + * ~~~ + * Sum = CONCAT(Rs1[MSB],Rs1[MSB:0]) + CONCAT(Rs2[MSB],Rs2[MSB:0]) + 1; + * Rd = Sum[(MSB+1):1]; + * for RV32: MSB=31, + * for RV64: MSB=63 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_AVE(long a, long b) +{ + register long result; + __ASM volatile("ave %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.4. AVE ===== */ + +/* ===== Inline Function Start for 3.5. BITREV ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC + * \brief BITREV (Bit Reverse) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * BITREV Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Reverse the bit positions of the source operand within a specified width starting from bit + * 0. The reversed width is a variable from a GPR. + * + * **Description**:\n + * This instruction reverses the bit positions of the content of Rs1. The reversed bit width + * is calculated as Rs2[4:0]+1 (RV32) or Rs2[5:0]+1 (RV64). The upper bits beyond the reversed width + * are filled with zeros. After the bit reverse operation, the result is written to Rd. + * + * **Operations**:\n + * ~~~ + * msb = Rs2[4:0]; (for RV32) + * msb = Rs2[5:0]; (for RV64) + * rev[0:msb] = Rs1[msb:0]; + * Rd = ZE(rev[msb:0]); + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_BITREV(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("bitrev %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.5. BITREV ===== */ + +/* ===== Inline Function Start for 3.6. BITREVI ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC + * \brief BITREVI (Bit Reverse Immediate) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * (RV32) BITREVI Rd, Rs1, imm[4:0] + * (RV64) BITREVI Rd, Rs1, imm[5:0] + * ~~~ + * + * **Purpose**:\n + * Reverse the bit positions of the source operand within a specified width starting from bit + * 0. The reversed width is an immediate value. + * + * **Description**:\n + * This instruction reverses the bit positions of the content of Rs1. The reversed bit width + * is calculated as imm[4:0]+1 (RV32) or imm[5:0]+1 (RV64). The upper bits beyond the reversed width + * are filled with zeros. After the bit reverse operation, the result is written to Rd. + * + * **Operations**:\n + * ~~~ + * msb = imm[4:0]; (RV32) + * msb = imm[5:0]; (RV64) + * rev[0:msb] = Rs1[msb:0]; + * Rd = ZE(rev[msb:0]); + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_BITREVI(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("bitrevi %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.6. BITREVI ===== */ + +/* ===== Inline Function Start for 3.7. BPICK ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC + * \brief BPICK (Bit-wise Pick) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * BPICK Rd, Rs1, Rs2, Rc + * ~~~ + * + * **Purpose**:\n + * Select from two source operands based on a bit mask in the third operand. + * + * **Description**:\n + * This instruction selects individual bits from Rs1 or Rs2, based on the bit mask value in + * Rc. If a bit in Rc is 1, the corresponding bit is from Rs1; otherwise, the corresponding bit is from Rs2. + * The selection results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd[x] = Rc[x]? Rs1[x] : Rs2[x]; + * for RV32, x=31...0 + * for RV64, x=63...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \param [in] c unsigned long type of value stored in c + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_BPICK(unsigned long a, unsigned long b, unsigned long c) +{ + register unsigned long result; + __ASM volatile("bpick %0, %1, %2, %3" : "=r"(result) : "r"(a), "r"(b), "r"(c)); + return result; +} +/* ===== Inline Function End for 3.7. BPICK ===== */ + +/* ===== Inline Function Start for 3.8. CLROV ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC + * \brief CLROV (Clear OV flag) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * CLROV # pseudo mnemonic + * ~~~ + * + * **Purpose**:\n + * This pseudo instruction is an alias to `CSRRCI x0, ucode, 1` instruction. + * + * + */ +__STATIC_FORCEINLINE void __RV_CLROV(void) +{ + __ASM volatile("clrov "); +} +/* ===== Inline Function End for 3.8. CLROV ===== */ + +/* ===== Inline Function Start for 3.9. CLRS8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC + * \brief CLRS8 (SIMD 8-bit Count Leading Redundant Sign) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * CLRS8 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Count the number of redundant sign bits of the 8-bit elements of a general register. + * + * **Description**:\n + * Starting from the bits next to the sign bits of the 8-bit elements of Rs1, this instruction + * counts the number of redundant sign bits and writes the result to the corresponding 8-bit elements + * of Rd. + * + * **Operations**:\n + * ~~~ + * snum[x] = Rs1.B[x]; + * cnt[x] = 0; + * for (i = 6 to 0) { + * if (snum[x](i) == snum[x](7)) { + * cnt[x] = cnt[x] + 1; + * } else { + * break; + * } + * } + * Rd.B[x] = cnt[x]; + * for RV32: x=3...0 + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CLRS8(unsigned long a) +{ + register unsigned long result; + __ASM volatile("clrs8 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.9. CLRS8 ===== */ + +/* ===== Inline Function Start for 3.10. CLRS16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC + * \brief CLRS16 (SIMD 16-bit Count Leading Redundant Sign) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * CLRS16 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Count the number of redundant sign bits of the 16-bit elements of a general register. + * + * **Description**:\n + * Starting from the bits next to the sign bits of the 16-bit elements of Rs1, this + * instruction counts the number of redundant sign bits and writes the result to the corresponding 16- + * bit elements of Rd. + * + * **Operations**:\n + * ~~~ + * snum[x] = Rs1.H[x]; + * cnt[x] = 0; + * for (i = 14 to 0) { + * if (snum[x](i) == snum[x](15)) { + * cnt[x] = cnt[x] + 1; + * } else { + * break; + * } + * } + * Rd.H[x] = cnt[x]; + * for RV32: x=1...0 + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CLRS16(unsigned long a) +{ + register unsigned long result; + __ASM volatile("clrs16 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.10. CLRS16 ===== */ + +/* ===== Inline Function Start for 3.11. CLRS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC + * \brief CLRS32 (SIMD 32-bit Count Leading Redundant Sign) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * CLRS32 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Count the number of redundant sign bits of the 32-bit elements of a general register. + * + * **Description**:\n + * Starting from the bits next to the sign bits of the 32-bit elements of Rs1, this + * instruction counts the number of redundant sign bits and writes the result to the corresponding 32- + * bit elements of Rd. + * + * **Operations**:\n + * ~~~ + * snum[x] = Rs1.W[x]; + * cnt[x] = 0; + * for (i = 30 to 0) { + * if (snum[x](i) == snum[x](31)) { + * cnt[x] = cnt[x] + 1; + * } else { + * break; + * } + * } + * Rd.W[x] = cnt[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CLRS32(unsigned long a) +{ + register unsigned long result; + __ASM volatile("clrs32 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.11. CLRS32 ===== */ + +/* ===== Inline Function Start for 3.12. CLO8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC + * \brief CLO8 (SIMD 8-bit Count Leading One) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * CLO8 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Count the number of leading one bits of the 8-bit elements of a general register. + * + * **Description**:\n + * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction + * counts the number of leading one bits and writes the results to the corresponding 8-bit elements of + * Rd. + * + * **Operations**:\n + * ~~~ + * snum[x] = Rs1.B[x]; + * cnt[x] = 0; + * for (i = 7 to 0) { + * if (snum[x](i) == 1) { + * cnt[x] = cnt[x] + 1; + * } else { + * break; + * } + * } + * Rd.B[x] = cnt[x]; + * for RV32: x=3...0 + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CLO8(unsigned long a) +{ + register unsigned long result; + __ASM volatile("clo8 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.12. CLO8 ===== */ + +/* ===== Inline Function Start for 3.13. CLO16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC + * \brief CLO16 (SIMD 16-bit Count Leading One) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * CLO16 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Count the number of leading one bits of the 16-bit elements of a general register. + * + * **Description**:\n + * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction + * counts the number of leading one bits and writes the results to the corresponding 16-bit elements + * of Rd. + * + * **Operations**:\n + * ~~~ + * snum[x] = Rs1.H[x]; + * cnt[x] = 0; + * for (i = 15 to 0) { + * if (snum[x](i) == 1) { + * cnt[x] = cnt[x] + 1; + * } else { + * break; + * } + * } + * Rd.H[x] = cnt[x]; + * for RV32: x=1...0 + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CLO16(unsigned long a) +{ + register unsigned long result; + __ASM volatile("clo16 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.13. CLO16 ===== */ + +/* ===== Inline Function Start for 3.14. CLO32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC + * \brief CLO32 (SIMD 32-bit Count Leading One) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * CLO32 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Count the number of leading one bits of the 32-bit elements of a general register. + * + * **Description**:\n + * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction + * counts the number of leading one bits and writes the results to the corresponding 32-bit elements + * of Rd. + * + * **Operations**:\n + * ~~~ + * snum[x] = Rs1.W[x]; + * cnt[x] = 0; + * for (i = 31 to 0) { + * if (snum[x](i) == 1) { + * cnt[x] = cnt[x] + 1; + * } else { + * break; + * } + * } + * Rd.W[x] = cnt[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CLO32(unsigned long a) +{ + register unsigned long result; + __ASM volatile("clo32 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.14. CLO32 ===== */ + +/* ===== Inline Function Start for 3.15. CLZ8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC + * \brief CLZ8 (SIMD 8-bit Count Leading Zero) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * CLZ8 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Count the number of leading zero bits of the 8-bit elements of a general register. + * + * **Description**:\n + * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction + * counts the number of leading zero bits and writes the results to the corresponding 8-bit elements of + * Rd. + * + * **Operations**:\n + * ~~~ + * snum[x] = Rs1.B[x]; + * cnt[x] = 0; + * for (i = 7 to 0) { + * if (snum[x](i) == 0) { + * cnt[x] = cnt[x] + 1; + * } else { + * break; + * } + * } + * Rd.B[x] = cnt[x]; + * for RV32: x=3...0 + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CLZ8(unsigned long a) +{ + register unsigned long result; + __ASM volatile("clz8 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.15. CLZ8 ===== */ + +/* ===== Inline Function Start for 3.16. CLZ16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC + * \brief CLZ16 (SIMD 16-bit Count Leading Zero) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * CLZ16 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Count the number of leading zero bits of the 16-bit elements of a general register. + * + * **Description**:\n + * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction + * counts the number of leading zero bits and writes the results to the corresponding 16-bit elements + * of Rd. + * + * **Operations**:\n + * ~~~ + * snum[x] = Rs1.H[x]; + * cnt[x] = 0; + * for (i = 15 to 0) { + * if (snum[x](i) == 0) { + * cnt[x] = cnt[x] + 1; + * } else { + * break; + * } + * } + * Rd.H[x] = cnt[x]; + * for RV32: x=1...0 + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CLZ16(unsigned long a) +{ + register unsigned long result; + __ASM volatile("clz16 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.16. CLZ16 ===== */ + +/* ===== Inline Function Start for 3.17. CLZ32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC + * \brief CLZ32 (SIMD 32-bit Count Leading Zero) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * CLZ32 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Count the number of leading zero bits of the 32-bit elements of a general register. + * + * **Description**:\n + * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction + * counts the number of leading zero bits and writes the results to the corresponding 32-bit elements + * of Rd. + * + * **Operations**:\n + * ~~~ + * snum[x] = Rs1.W[x]; + * cnt[x] = 0; + * for (i = 31 to 0) { + * if (snum[x](i) == 0) { + * cnt[x] = cnt[x] + 1; + * } else { + * break; + * } + * } + * Rd.W[x] = cnt[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CLZ32(unsigned long a) +{ + register unsigned long result; + __ASM volatile("clz32 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.17. CLZ32 ===== */ + +/* ===== Inline Function Start for 3.18. CMPEQ8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP + * \brief CMPEQ8 (SIMD 8-bit Integer Compare Equal) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * CMPEQ8 Rs, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit integer elements equal comparisons simultaneously. + * + * **Description**:\n + * This instruction compares the 8-bit integer elements in Rs1 with the 8-bit integer + * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFF; otherwise, the result is + * 0x0. The 8-bit element comparison results are written to Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned numbers. + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = (Rs1.B[x] == Rs2.B[x])? 0xff : 0x0; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CMPEQ8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("cmpeq8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.18. CMPEQ8 ===== */ + +/* ===== Inline Function Start for 3.19. CMPEQ16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP + * \brief CMPEQ16 (SIMD 16-bit Integer Compare Equal) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * CMPEQ16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit integer elements equal comparisons simultaneously. + * + * **Description**:\n + * This instruction compares the 16-bit integer elements in Rs1 with the 16-bit integer + * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFFFF; otherwise, the result + * is 0x0. The 16-bit element comparison results are written to Rt. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned numbers. + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = (Rs1.H[x] == Rs2.H[x])? 0xffff : 0x0; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CMPEQ16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("cmpeq16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.19. CMPEQ16 ===== */ + +/* ===== Inline Function Start for 3.20. CRAS16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief CRAS16 (SIMD 16-bit Cross Addition & Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * CRAS16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit + * chunk simultaneously. Operands are from crossed positions in 32-bit chunks. + * + * **Description**:\n + * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with + * the 16-bit integer element in [15:0] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit + * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [31:16] of 32-bit chunks in + * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32- + * bit chunks in Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned operations. + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][15:0]; + * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][31:16]; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CRAS16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("cras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.20. CRAS16 ===== */ + +/* ===== Inline Function Start for 3.21. CRSA16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief CRSA16 (SIMD 16-bit Cross Subtraction & Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * CRSA16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit + * chunk simultaneously. Operands are from crossed positions in 32-bit chunks. + * + * **Description**:\n + * This instruction subtracts the 16-bit integer element in [15:0] of 32-bit chunks in Rs2 + * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of + * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [31:16] of 32-bit chunks + * in Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to + * [15:0] of 32-bit chunks in Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned operations. + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][15:0]; + * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][31:16]; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CRSA16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("crsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.21. CRSA16 ===== */ + +/* ===== Inline Function Start for 3.22. INSB ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC + * \brief INSB (Insert Byte) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * (RV32) INSB Rd, Rs1, imm[1:0] + * (RV64) INSB Rd, Rs1, imm[2:0] + * ~~~ + * + * **Purpose**:\n + * Insert byte 0 of a 32-bit or 64-bit register into one of the byte elements of another register. + * + * **Description**:\n + * This instruction inserts byte 0 of Rs1 into byte `imm[1:0]` (RV32) or `imm[2:0]` (RV64) + * of Rd. + * + * **Operations**:\n + * ~~~ + * bpos = imm[1:0]; (RV32) + * bpos = imm[2:0]; (RV64) + * Rd.B[bpos] = Rs1.B[0] + * ~~~ + * + * \param [in] t unsigned long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_INSB(t, a, b) \ + ({ \ + register unsigned long __t = (unsigned long)(t); \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("insb %0, %1, %2" : "+r"(__t) : "r"(__a), "K"(b)); \ + __t; \ + }) +/* ===== Inline Function End for 3.22. INSB ===== */ + +/* ===== Inline Function Start for 3.23. KABS8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC + * \brief KABS8 (SIMD 8-bit Saturating Absolute) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KABS8 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Get the absolute value of 8-bit signed integer elements simultaneously. + * + * **Description**:\n + * This instruction calculates the absolute value of 8-bit signed integer elements stored + * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates + * 0x7f as the output and sets the OV bit to 1. + * + * **Operations**:\n + * ~~~ + * src = Rs1.B[x]; + * if (src == 0x80) { + * src = 0x7f; + * OV = 1; + * } else if (src[7] == 1) + * src = -src; + * } + * Rd.B[x] = src; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KABS8(unsigned long a) +{ + register unsigned long result; + __ASM volatile("kabs8 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.23. KABS8 ===== */ + +/* ===== Inline Function Start for 3.24. KABS16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC + * \brief KABS16 (SIMD 16-bit Saturating Absolute) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KABS16 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Get the absolute value of 16-bit signed integer elements simultaneously. + * + * **Description**:\n + * This instruction calculates the absolute value of 16-bit signed integer elements stored + * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction + * generates 0x7fff as the output and sets the OV bit to 1. + * + * **Operations**:\n + * ~~~ + * src = Rs1.H[x]; + * if (src == 0x8000) { + * src = 0x7fff; + * OV = 1; + * } else if (src[15] == 1) + * src = -src; + * } + * Rd.H[x] = src; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KABS16(unsigned long a) +{ + register unsigned long result; + __ASM volatile("kabs16 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.24. KABS16 ===== */ + +/* ===== Inline Function Start for 3.25. KABSW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief KABSW (Scalar 32-bit Absolute Value with Saturation) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KABSW Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Get the absolute value of a signed 32-bit integer in a general register. + * + * **Description**:\n + * This instruction calculates the absolute value of a signed 32-bit integer stored in Rs1. + * The result is sign-extended (for RV64) and written to Rd. This instruction with the minimum + * negative integer input of 0x80000000 will produce a saturated output of maximum positive integer + * of 0x7fffffff and the OV flag will be set to 1. + * + * **Operations**:\n + * ~~~ + * if (Rs1.W[0] >= 0) { + * res = Rs1.W[0]; + * } else { + * If (Rs1.W[0] == 0x80000000) { + * res = 0x7fffffff; + * OV = 1; + * } else { + * res = -Rs1.W[0]; + * } + * } + * Rd = SE32(res); + * ~~~ + * + * \param [in] a signed long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KABSW(signed long a) +{ + register unsigned long result; + __ASM volatile("kabsw %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.25. KABSW ===== */ + +/* ===== Inline Function Start for 3.26. KADD8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB + * \brief KADD8 (SIMD 8-bit Signed Saturating Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KADD8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit signed integer element saturating additions simultaneously. + * + * **Description**:\n + * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed + * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they + * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.B[x] + Rs2.B[x]; + * if (res[x] > 127) { + * res[x] = 127; + * OV = 1; + * } else if (res[x] < -128) { + * res[x] = -128; + * OV = 1; + * } + * Rd.B[x] = res[x]; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KADD8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.26. KADD8 ===== */ + +/* ===== Inline Function Start for 3.27. KADD16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief KADD16 (SIMD 16-bit Signed Saturating Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KADD16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer element saturating additions simultaneously. + * + * **Description**:\n + * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed + * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), + * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.H[x] + Rs2.H[x]; + * if (res[x] > 32767) { + * res[x] = 32767; + * OV = 1; + * } else if (res[x] < -32768) { + * res[x] = -32768; + * OV = 1; + * } + * Rd.H[x] = res[x]; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KADD16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.27. KADD16 ===== */ + +/* ===== Inline Function Start for 3.28. KADD64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB + * \brief KADD64 (64-bit Signed Saturating Addition) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * KADD64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Add two 64-bit signed integers. The result is saturated to the Q63 range. + * + * **RV32 Description**:\n + * This instruction adds the 64-bit signed integer of an even/odd pair of registers + * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by + * Rs2(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the + * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers + * specified by Rd(4,1). + * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register + * pair includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed + * integer in Rs2. If the result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the + * range and the OV bit is set to 1. The saturated result is written to Rd. + * + * **Operations**:\n + * ~~~ + * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); + * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); + * result = R[a_H].R[a_L] + R[b_H].R[b_L]; + * if (result > (2^63)-1) { + * result = (2^63)-1; OV = 1; + * } else if (result < -2^63) { + * result = -2^63; OV = 1; + * } + * R[t_H].R[t_L] = result; + * RV64: + * result = Rs1 + Rs2; + * if (result > (2^63)-1) { + * result = (2^63)-1; OV = 1; + * } else if (result < -2^63) { + * result = -2^63; OV = 1; + * } + * Rd = result; + * ~~~ + * + * \param [in] a long long type of value stored in a + * \param [in] b long long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_KADD64(long long a, long long b) +{ + register long long result; + __ASM volatile("kadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.28. KADD64 ===== */ + +/* ===== Inline Function Start for 3.29. KADDH ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU + * \brief KADDH (Signed Addition with Q15 Saturation) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KADDH Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Add the signed lower 32-bit content of two registers with Q15 saturation. + * + * **Description**:\n + * The signed lower 32-bit content of Rs1 is added with the signed lower 32-bit content of + * Rs2. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then sign- + * extended and written to Rd. If saturation happens, this instruction sets the OV flag. + * + * **Operations**:\n + * ~~~ + * tmp = Rs1.W[0] + Rs2.W[0]; + * if (tmp > 32767) { + * res = 32767; + * OV = 1; + * } else if (tmp < -32768) { + * res = -32768; + * OV = 1 + * } else { + * res = tmp; + * } + * Rd = SE(tmp[15:0]); + * ~~~ + * + * \param [in] a int type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KADDH(int a, int b) +{ + register long result; + __ASM volatile("kaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.29. KADDH ===== */ + +/* ===== Inline Function Start for 3.30. KADDW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief KADDW (Signed Addition with Q31 Saturation) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KADDW Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Add the lower 32-bit signed content of two registers with Q31 saturation. + * + * **Description**:\n + * The lower 32-bit signed content of Rs1 is added with the lower 32-bit signed content of + * Rs2. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then sign- + * extended and written to Rd. If saturation happens, this instruction sets the OV flag. + * + * **Operations**:\n + * ~~~ + * tmp = Rs1.W[0] + Rs2.W[0]; + * if (tmp > (2^31)-1) { + * res = (2^31)-1; + * OV = 1; + * } else if (tmp < -2^31) { + * res = -2^31; + * OV = 1 + * } else { + * res = tmp; + * } + * Rd = res[31:0]; // RV32 + * Rd = SE(res[31:0]) // RV64 + * ~~~ + * + * \param [in] a int type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KADDW(int a, int b) +{ + register long result; + __ASM volatile("kaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.30. KADDW ===== */ + +/* ===== Inline Function Start for 3.31. KCRAS16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief KCRAS16 (SIMD 16-bit Signed Saturating Cross Addition & Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KCRAS16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer element saturating addition and 16-bit signed integer element + * saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32- + * bit chunks. + * + * **Description**:\n + * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in + * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it + * subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed + * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number + * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated + * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for + * subtraction. + * + * **Operations**:\n + * ~~~ + * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0]; + * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16]; + * for (res in [res1, res2]) { + * if (res > (2^15)-1) { + * res = (2^15)-1; + * OV = 1; + * } else if (res < -2^15) { + * res = -2^15; + * OV = 1; + * } + * } + * Rd.W[x][31:16] = res1; + * Rd.W[x][15:0] = res2; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KCRAS16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.31. KCRAS16 ===== */ + +/* ===== Inline Function Start for 3.32. KCRSA16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief KCRSA16 (SIMD 16-bit Signed Saturating Cross Subtraction & Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KCRSA16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element + * saturating addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit + * chunks. + * + * **Description**:\n + * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks + * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it + * adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 with the 16-bit signed + * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number + * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated + * results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd + * for addition. + * + * **Operations**:\n + * ~~~ + * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0]; + * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16]; + * for (res in [res1, res2]) { + * if (res > (2^15)-1) { + * res = (2^15)-1; + * OV = 1; + * } else if (res < -2^15) { + * res = -2^15; + * OV = 1; + * } + * } + * Rd.W[x][31:16] = res1; + * Rd.W[x][15:0] = res2; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KCRSA16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.32. KCRSA16 ===== */ + +/* ===== Inline Function Start for 3.33.1. KDMBB ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief KDMBB (Signed Saturating Double Multiply B16 x B16) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is + * written into the destination register for RV32 or sign-extended to 64-bits and written into the + * destination register for RV64. If saturation happens, an overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with + * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then + * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in + * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be + * saturated to 0x7FFFFFFF and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB + * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT + * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT + * If (0x8000 != aop | 0x8000 != bop) { + * Mresult = aop * bop; + * resQ31 = Mresult << 1; + * Rd = resQ31; // RV32 + * Rd = SE(resQ31); // RV64 + * } else { + * resQ31 = 0x7FFFFFFF; + * Rd = resQ31; // RV32 + * Rd = SE(resQ31); // RV64 + * OV = 1; + * } + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KDMBB(unsigned int a, unsigned int b) +{ + register long result; + __ASM volatile("kdmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.33.1. KDMBB ===== */ + +/* ===== Inline Function Start for 3.33.2. KDMBT ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief KDMBT (Signed Saturating Double Multiply B16 x T16) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is + * written into the destination register for RV32 or sign-extended to 64-bits and written into the + * destination register for RV64. If saturation happens, an overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with + * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then + * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in + * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be + * saturated to 0x7FFFFFFF and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB + * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT + * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT + * If (0x8000 != aop | 0x8000 != bop) { + * Mresult = aop * bop; + * resQ31 = Mresult << 1; + * Rd = resQ31; // RV32 + * Rd = SE(resQ31); // RV64 + * } else { + * resQ31 = 0x7FFFFFFF; + * Rd = resQ31; // RV32 + * Rd = SE(resQ31); // RV64 + * OV = 1; + * } + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KDMBT(unsigned int a, unsigned int b) +{ + register long result; + __ASM volatile("kdmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.33.2. KDMBT ===== */ + +/* ===== Inline Function Start for 3.33.3. KDMTT ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief KDMTT (Signed Saturating Double Multiply T16 x T16) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is + * written into the destination register for RV32 or sign-extended to 64-bits and written into the + * destination register for RV64. If saturation happens, an overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with + * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then + * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in + * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be + * saturated to 0x7FFFFFFF and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB + * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT + * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT + * If (0x8000 != aop | 0x8000 != bop) { + * Mresult = aop * bop; + * resQ31 = Mresult << 1; + * Rd = resQ31; // RV32 + * Rd = SE(resQ31); // RV64 + * } else { + * resQ31 = 0x7FFFFFFF; + * Rd = resQ31; // RV32 + * Rd = SE(resQ31); // RV64 + * OV = 1; + * } + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KDMTT(unsigned int a, unsigned int b) +{ + register long result; + __ASM volatile("kdmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.33.3. KDMTT ===== */ + +/* ===== Inline Function Start for 3.34.1. KDMABB ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief KDMABB (Signed Saturating Double Multiply Addition B16 x B16) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result + * with the sign-extended lower 32-bit chunk destination register and write the saturated addition + * result into the destination register. If saturation happens, an overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with + * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then + * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the + * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and + * the OV flag is set to 1. The result after saturation is written to Rd. + * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be + * set. + * + * **Operations**:\n + * ~~~ + * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB + * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT + * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT + * If (0x8000 != aop | 0x8000 != bop) { + * Mresult = aop * bop; + * resQ31 = Mresult << 1; + * } else { + * resQ31 = 0x7FFFFFFF; + * OV = 1; + * } + * resadd = Rd + resQ31; // RV32 + * resadd = Rd.W[0] + resQ31; // RV64 + * if (resadd > (2^31)-1) { + * resadd = (2^31)-1; + * OV = 1; + * } else if (resadd < -2^31) { + * resadd = -2^31; + * OV = 1; + * } + * Rd = resadd; // RV32 + * Rd = SE(resadd); // RV64 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KDMABB(long t, unsigned int a, unsigned int b) +{ + __ASM volatile("kdmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.34.1. KDMABB ===== */ + +/* ===== Inline Function Start for 3.34.2. KDMABT ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief KDMABT (Signed Saturating Double Multiply Addition B16 x T16) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result + * with the sign-extended lower 32-bit chunk destination register and write the saturated addition + * result into the destination register. If saturation happens, an overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with + * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then + * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the + * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and + * the OV flag is set to 1. The result after saturation is written to Rd. + * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be + * set. + * + * **Operations**:\n + * ~~~ + * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB + * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT + * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT + * If (0x8000 != aop | 0x8000 != bop) { + * Mresult = aop * bop; + * resQ31 = Mresult << 1; + * } else { + * resQ31 = 0x7FFFFFFF; + * OV = 1; + * } + * resadd = Rd + resQ31; // RV32 + * resadd = Rd.W[0] + resQ31; // RV64 + * if (resadd > (2^31)-1) { + * resadd = (2^31)-1; + * OV = 1; + * } else if (resadd < -2^31) { + * resadd = -2^31; + * OV = 1; + * } + * Rd = resadd; // RV32 + * Rd = SE(resadd); // RV64 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KDMABT(long t, unsigned int a, unsigned int b) +{ + __ASM volatile("kdmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.34.2. KDMABT ===== */ + +/* ===== Inline Function Start for 3.34.3. KDMATT ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief KDMATT (Signed Saturating Double Multiply Addition T16 x T16) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result + * with the sign-extended lower 32-bit chunk destination register and write the saturated addition + * result into the destination register. If saturation happens, an overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with + * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then + * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the + * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and + * the OV flag is set to 1. The result after saturation is written to Rd. + * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be + * set. + * + * **Operations**:\n + * ~~~ + * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB + * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT + * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT + * If (0x8000 != aop | 0x8000 != bop) { + * Mresult = aop * bop; + * resQ31 = Mresult << 1; + * } else { + * resQ31 = 0x7FFFFFFF; + * OV = 1; + * } + * resadd = Rd + resQ31; // RV32 + * resadd = Rd.W[0] + resQ31; // RV64 + * if (resadd > (2^31)-1) { + * resadd = (2^31)-1; + * OV = 1; + * } else if (resadd < -2^31) { + * resadd = -2^31; + * OV = 1; + * } + * Rd = resadd; // RV32 + * Rd = SE(resadd); // RV64 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KDMATT(long t, unsigned int a, unsigned int b) +{ + __ASM volatile("kdmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.34.3. KDMATT ===== */ + +/* ===== Inline Function Start for 3.35.1. KHM8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY + * \brief KHM8 (SIMD Signed Saturating Q7 Multiply) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KHM8 Rd, Rs1, Rs2 + * KHMX8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7 + * numbers again. + * + * **Description**:\n + * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 + * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7 + * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2. + * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the + * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7 + * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2. + * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then + * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen. + * The result will be saturated to 0x7F and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * if (is `KHM8`) { + * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top + * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom + * } else if (is `KHMX8`) { + * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top + * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom + * } + * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { + * if (0x80 != aop | 0x80 != bop) { + * res = (aop s* bop) >> 7; + * } else { + * res= 0x7F; + * OV = 1; + * } + * } + * Rd.H[x/2] = concat(rest, resb); + * for RV32, x=0,2 + * for RV64, x=0,2,4,6 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KHM8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("khm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.35.1. KHM8 ===== */ + +/* ===== Inline Function Start for 3.35.2. KHMX8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY + * \brief KHMX8 (SIMD Signed Saturating Crossed Q7 Multiply) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KHM8 Rd, Rs1, Rs2 + * KHMX8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7 + * numbers again. + * + * **Description**:\n + * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 + * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7 + * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2. + * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the + * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7 + * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2. + * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then + * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen. + * The result will be saturated to 0x7F and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * if (is `KHM8`) { + * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top + * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom + * } else if (is `KHMX8`) { + * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top + * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom + * } + * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { + * if (0x80 != aop | 0x80 != bop) { + * res = (aop s* bop) >> 7; + * } else { + * res= 0x7F; + * OV = 1; + * } + * } + * Rd.H[x/2] = concat(rest, resb); + * for RV32, x=0,2 + * for RV64, x=0,2,4,6 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KHMX8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("khmx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.35.2. KHMX8 ===== */ + +/* ===== Inline Function Start for 3.36.1. KHM16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY + * \brief KHM16 (SIMD Signed Saturating Q15 Multiply) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KHM16 Rd, Rs1, Rs2 + * KHMX16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to + * Q15 numbers again. + * + * **Description**:\n + * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in + * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom + * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in + * Rs2. + * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the + * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15 + * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. + * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are + * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will + * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * if (is `KHM16`) { + * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top + * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom + * } else if (is `KHMX16`) { + * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top + * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom + * } + * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { + * if (0x8000 != aop | 0x8000 != bop) { + * res = (aop s* bop) >> 15; + * } else { + * res= 0x7FFF; + * OV = 1; + * } + * } + * Rd.W[x/2] = concat(rest, resb); + * for RV32: x=0 + * for RV64: x=0,2 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KHM16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("khm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.36.1. KHM16 ===== */ + +/* ===== Inline Function Start for 3.36.2. KHMX16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY + * \brief KHMX16 (SIMD Signed Saturating Crossed Q15 Multiply) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KHM16 Rd, Rs1, Rs2 + * KHMX16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to + * Q15 numbers again. + * + * **Description**:\n + * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in + * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom + * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in + * Rs2. + * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the + * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15 + * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. + * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are + * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will + * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * if (is `KHM16`) { + * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top + * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom + * } else if (is `KHMX16`) { + * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top + * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom + * } + * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { + * if (0x8000 != aop | 0x8000 != bop) { + * res = (aop s* bop) >> 15; + * } else { + * res= 0x7FFF; + * OV = 1; + * } + * } + * Rd.W[x/2] = concat(rest, resb); + * for RV32: x=0 + * for RV64: x=0,2 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KHMX16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("khmx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.36.2. KHMX16 ===== */ + +/* ===== Inline Function Start for 3.37.1. KHMBB ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU + * \brief KHMBB (Signed Saturating Half Multiply B16 x B16) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion + * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15 + * number again and saturate the Q15 result into the destination register. If saturation happens, an + * overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with + * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right- + * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into + * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated + * to 0x7FFF and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB + * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT + * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT + * If (0x8000 != aop | 0x8000 != bop) { + * Mresult[31:0] = aop * bop; + * res[15:0] = Mresult[30:15]; + * } else { + * res[15:0] = 0x7FFF; + * OV = 1; + * } + * Rd = SE32(res[15:0]); // Rv32 + * Rd = SE64(res[15:0]); // RV64 + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KHMBB(unsigned int a, unsigned int b) +{ + register long result; + __ASM volatile("khmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.37.1. KHMBB ===== */ + +/* ===== Inline Function Start for 3.37.2. KHMBT ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU + * \brief KHMBT (Signed Saturating Half Multiply B16 x T16) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion + * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15 + * number again and saturate the Q15 result into the destination register. If saturation happens, an + * overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with + * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right- + * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into + * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated + * to 0x7FFF and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB + * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT + * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT + * If (0x8000 != aop | 0x8000 != bop) { + * Mresult[31:0] = aop * bop; + * res[15:0] = Mresult[30:15]; + * } else { + * res[15:0] = 0x7FFF; + * OV = 1; + * } + * Rd = SE32(res[15:0]); // Rv32 + * Rd = SE64(res[15:0]); // RV64 + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KHMBT(unsigned int a, unsigned int b) +{ + register long result; + __ASM volatile("khmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.37.2. KHMBT ===== */ + +/* ===== Inline Function Start for 3.37.3. KHMTT ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU + * \brief KHMTT (Signed Saturating Half Multiply T16 x T16) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion + * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15 + * number again and saturate the Q15 result into the destination register. If saturation happens, an + * overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with + * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right- + * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into + * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated + * to 0x7FFF and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB + * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT + * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT + * If (0x8000 != aop | 0x8000 != bop) { + * Mresult[31:0] = aop * bop; + * res[15:0] = Mresult[30:15]; + * } else { + * res[15:0] = 0x7FFF; + * OV = 1; + * } + * Rd = SE32(res[15:0]); // Rv32 + * Rd = SE64(res[15:0]); // RV64 + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KHMTT(unsigned int a, unsigned int b) +{ + register long result; + __ASM volatile("khmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.37.3. KHMTT ===== */ + +/* ===== Inline Function Start for 3.38.1. KMABB ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief KMABB (SIMD Saturating Signed Multiply Bottom Halfs & Add) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMABB Rd, Rs1, Rs2 + * KMABT Rd, Rs1, Rs2 + * KMATT Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content + * of 32-bit elements in another register and add the result to the content of 32-bit elements in the + * third register. The addition result may be saturated and is written to the third register. + * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element) + * * KMABT rd.W[x] + bottom*top (per 32-bit element) + * * KMATT rd.W[x] + top*top (per 32-bit element) + * + * **Description**:\n + * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the bottom 16-bit content of 32-bit elements in Rs2. + * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the top 16-bit content of 32-bit elements in Rs2. + * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the + * top 16-bit content of 32-bit elements in Rs2. + * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is + * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to + * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as + * signed integers. + * + * **Operations**:\n + * ~~~ + * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB + * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT + * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMABB(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.38.1. KMABB ===== */ + +/* ===== Inline Function Start for 3.38.2. KMABT ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief KMABT (SIMD Saturating Signed Multiply Bottom & Top Halfs & Add) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMABB Rd, Rs1, Rs2 + * KMABT Rd, Rs1, Rs2 + * KMATT Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content + * of 32-bit elements in another register and add the result to the content of 32-bit elements in the + * third register. The addition result may be saturated and is written to the third register. + * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element) + * * KMABT rd.W[x] + bottom*top (per 32-bit element) + * * KMATT rd.W[x] + top*top (per 32-bit element) + * + * **Description**:\n + * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the bottom 16-bit content of 32-bit elements in Rs2. + * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the top 16-bit content of 32-bit elements in Rs2. + * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the + * top 16-bit content of 32-bit elements in Rs2. + * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is + * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to + * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as + * signed integers. + * + * **Operations**:\n + * ~~~ + * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB + * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT + * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMABT(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.38.2. KMABT ===== */ + +/* ===== Inline Function Start for 3.38.3. KMATT ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief KMATT (SIMD Saturating Signed Multiply Top Halfs & Add) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMABB Rd, Rs1, Rs2 + * KMABT Rd, Rs1, Rs2 + * KMATT Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content + * of 32-bit elements in another register and add the result to the content of 32-bit elements in the + * third register. The addition result may be saturated and is written to the third register. + * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element) + * * KMABT rd.W[x] + bottom*top (per 32-bit element) + * * KMATT rd.W[x] + top*top (per 32-bit element) + * + * **Description**:\n + * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the bottom 16-bit content of 32-bit elements in Rs2. + * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the top 16-bit content of 32-bit elements in Rs2. + * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the + * top 16-bit content of 32-bit elements in Rs2. + * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is + * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to + * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as + * signed integers. + * + * **Operations**:\n + * ~~~ + * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB + * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT + * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMATT(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.38.3. KMATT ===== */ + +/* ===== Inline Function Start for 3.39.1. KMADA ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief KMADA (SIMD Saturating Signed Multiply Two Halfs and Two Adds) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMADA Rd, Rs1, Rs2 + * KMAXDA Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds + * the two 32-bit results and 32-bit elements in a third register together. The addition result may be + * saturated. + * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element) + * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element) + * + * **Description**:\n + * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of + * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit + * elements in Rs2. + * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the + * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying + * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in + * Rs2. + * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31 + * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit + * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * // KMADA + * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); + * // KMAXDA + * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMADA(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.39.1. KMADA ===== */ + +/* ===== Inline Function Start for 3.39.2. KMAXDA ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief KMAXDA (SIMD Saturating Signed Crossed Multiply Two Halfs and Two Adds) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMADA Rd, Rs1, Rs2 + * KMAXDA Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds + * the two 32-bit results and 32-bit elements in a third register together. The addition result may be + * saturated. + * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element) + * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element) + * + * **Description**:\n + * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of + * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit + * elements in Rs2. + * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the + * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying + * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in + * Rs2. + * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31 + * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit + * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * // KMADA + * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); + * // KMAXDA + * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMAXDA(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmaxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.39.2. KMAXDA ===== */ + +/* ===== Inline Function Start for 3.40.1. KMADS ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief KMADS (SIMD Saturating Signed Multiply Two Halfs & Subtract & Add) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMADS Rd, Rs1, Rs2 + * KMADRS Rd, Rs1, Rs2 + * KMAXDS Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then + * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to + * the corresponding 32-bit elements in a third register. The addition result may be saturated. + * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element) + * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element) + * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element) + * + * **Description**:\n + * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of + * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit + * elements in Rs2. + * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the + * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of + * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32- + * bit elements in Rs2. + * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of + * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit + * elements in Rs2. + * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the + * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and + * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1 + * and Rs2 are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * // KMADS + * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); + * // KMADRS + * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); + * // KMAXDS + * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMADS(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmads %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.40.1. KMADS ===== */ + +/* ===== Inline Function Start for 3.40.2. KMADRS ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief KMADRS (SIMD Saturating Signed Multiply Two Halfs & Reverse Subtract & Add) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMADS Rd, Rs1, Rs2 + * KMADRS Rd, Rs1, Rs2 + * KMAXDS Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then + * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to + * the corresponding 32-bit elements in a third register. The addition result may be saturated. + * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element) + * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element) + * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element) + * + * **Description**:\n + * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of + * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit + * elements in Rs2. + * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the + * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of + * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32- + * bit elements in Rs2. + * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of + * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit + * elements in Rs2. + * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the + * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and + * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1 + * and Rs2 are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * // KMADS + * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); + * // KMADRS + * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); + * // KMAXDS + * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMADRS(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmadrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.40.2. KMADRS ===== */ + +/* ===== Inline Function Start for 3.40.3. KMAXDS ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief KMAXDS (SIMD Saturating Signed Crossed Multiply Two Halfs & Subtract & Add) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMADS Rd, Rs1, Rs2 + * KMADRS Rd, Rs1, Rs2 + * KMAXDS Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then + * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to + * the corresponding 32-bit elements in a third register. The addition result may be saturated. + * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element) + * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element) + * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element) + * + * **Description**:\n + * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of + * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit + * elements in Rs2. + * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the + * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of + * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32- + * bit elements in Rs2. + * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with + * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of + * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit + * elements in Rs2. + * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the + * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and + * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1 + * and Rs2 are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * // KMADS + * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); + * // KMADRS + * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); + * // KMAXDS + * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMAXDS(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmaxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.40.3. KMAXDS ===== */ + +/* ===== Inline Function Start for 3.41. KMAR64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB + * \brief KMAR64 (Signed Multiply and Saturating Add to 64-Bit Data) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * KMAR64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication + * results to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is + * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64). + * + * **RV32 Description**:\n + * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds + * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by + * Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the Q63 number range (-2^63 <= + * Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated result is written back + * to the even/odd pair of registers specified by Rd(4,1). + * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register + * pair includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It + * adds the 64-bit multiplication results to the 64-bit signed data of Rd with unlimited precision. If the + * 64-bit addition result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range + * and the OV bit is set to 1. The saturated result is written back to Rd. + * + * **Operations**:\n + * ~~~ + * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * result = R[t_H].R[t_L] + (Rs1 * Rs2); + * if (result > (2^63)-1) { + * result = (2^63)-1; OV = 1; + * } else if (result < -2^63) { + * result = -2^63; OV = 1; + * } + * R[t_H].R[t_L] = result; + * RV64: + * // `result` has unlimited precision + * result = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]); + * if (result > (2^63)-1) { + * result = (2^63)-1; OV = 1; + * } else if (result < -2^63) { + * result = -2^63; OV = 1; + * } + * Rd = result; + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_KMAR64(long long t, long a, long b) +{ + __ASM volatile("kmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.41. KMAR64 ===== */ + +/* ===== Inline Function Start for 3.42.1. KMDA ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief KMDA (SIMD Signed Multiply Two Halfs and Add) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMDA Rd, Rs1, Rs2 + * KMXDA Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * adds the two 32-bit results together. The addition result may be saturated. + * * KMDA: top*top + bottom*bottom (per 32-bit element) + * * KMXDA: top*bottom + bottom*top (per 32-bit element) + * + * **Description**:\n + * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of + * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32- + * bit elements of Rs2. + * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of + * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the + * 32-bit elements of Rs2. + * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1. + * The final results are written to Rd. The 16-bit contents are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * if Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000 { // KMDA Rd.W[x] = Rs1.W[x].H[1] * + * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0]) + * + (Rs1.W[x].H[0] * Rs2.W[x].H[1]; } else { Rd.W[x] = 0x7fffffff; OV = 1; } for RV32: x=0 for RV64: + * x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMDA(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("kmda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.42.1. KMDA ===== */ + +/* ===== Inline Function Start for 3.42.2. KMXDA ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief KMXDA (SIMD Signed Crossed Multiply Two Halfs and Add) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMDA Rd, Rs1, Rs2 + * KMXDA Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * adds the two 32-bit results together. The addition result may be saturated. + * * KMDA: top*top + bottom*bottom (per 32-bit element) + * * KMXDA: top*bottom + bottom*top (per 32-bit element) + * + * **Description**:\n + * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of + * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32- + * bit elements of Rs2. + * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of + * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the + * 32-bit elements of Rs2. + * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1. + * The final results are written to Rd. The 16-bit contents are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * if Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000 { // KMDA Rd.W[x] = Rs1.W[x].H[1] * + * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0]) + * + (Rs1.W[x].H[0] * Rs2.W[x].H[1]; } else { Rd.W[x] = 0x7fffffff; OV = 1; } for RV32: x=0 for RV64: + * x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMXDA(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("kmxda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.42.2. KMXDA ===== */ + +/* ===== Inline Function Start for 3.43.1. KMMAC ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC + * \brief KMMAC (SIMD Saturating MSW Signed Multiply Word and Add) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMAC Rd, Rs1, Rs2 + * KMMAC.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of two registers and add the most significant + * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are + * saturated first and then written back to the third register. The `.u` form performs an additional + * rounding up operation on the multiplication results before adding the most significant 32-bit part + * of the results. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2 + * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If + * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range + * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the + * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by + * adding a 1 to bit 31 of the results. + * + * **Operations**:\n + * ~~~ + * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][63:31] + 1; + * res[x] = Rd.W[x] + Round[x][32:1]; + * } else { + * res[x] = Rd.W[x] + Mres[x][63:32]; + * } + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMAC(long t, long a, long b) +{ + __ASM volatile("kmmac %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.43.1. KMMAC ===== */ + +/* ===== Inline Function Start for 3.43.2. KMMAC.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC + * \brief KMMAC.u (SIMD Saturating MSW Signed Multiply Word and Add with Rounding) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMAC Rd, Rs1, Rs2 + * KMMAC.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of two registers and add the most significant + * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are + * saturated first and then written back to the third register. The `.u` form performs an additional + * rounding up operation on the multiplication results before adding the most significant 32-bit part + * of the results. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2 + * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If + * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range + * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the + * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by + * adding a 1 to bit 31 of the results. + * + * **Operations**:\n + * ~~~ + * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][63:31] + 1; + * res[x] = Rd.W[x] + Round[x][32:1]; + * } else { + * res[x] = Rd.W[x] + Mres[x][63:32]; + * } + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMAC_U(long t, long a, long b) +{ + __ASM volatile("kmmac.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.43.2. KMMAC.u ===== */ + +/* ===== Inline Function Start for 3.44.1. KMMAWB ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief KMMAWB (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMAWB Rd, Rs1, Rs2 + * KMMAWB.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the + * corresponding 32-bit elements of another register and add the most significant 32-bit results with + * the corresponding signed 32-bit elements of a third register. The addition result is written to the + * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication + * results from the most significant discarded bit before the addition operations. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content + * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication + * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31 + * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results + * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the + * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to + * bit 15 of the result before the addition operations. + * + * **Operations**:\n + * ~~~ + * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][47:15] + 1; + * res[x] = Rd.W[x] + Round[x][32:1]; + * } else { + * res[x] = Rd.W[x] + Mres[x][47:16]; + * } + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMAWB(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmmawb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.44.1. KMMAWB ===== */ + +/* ===== Inline Function Start for 3.44.2. KMMAWB.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief KMMAWB.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add with Rounding) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMAWB Rd, Rs1, Rs2 + * KMMAWB.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the + * corresponding 32-bit elements of another register and add the most significant 32-bit results with + * the corresponding signed 32-bit elements of a third register. The addition result is written to the + * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication + * results from the most significant discarded bit before the addition operations. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content + * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication + * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31 + * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results + * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the + * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to + * bit 15 of the result before the addition operations. + * + * **Operations**:\n + * ~~~ + * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][47:15] + 1; + * res[x] = Rd.W[x] + Round[x][32:1]; + * } else { + * res[x] = Rd.W[x] + Mres[x][47:16]; + * } + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMAWB_U(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmmawb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.44.2. KMMAWB.u ===== */ + +/* ===== Inline Function Start for 3.45.1. KMMAWB2 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief KMMAWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMAWB2 Rd, Rs1, Rs2 + * KMMAWB2.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the + * corresponding 32-bit elements of another register, double the multiplication results and add the + * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third + * register. The saturated addition result is written to the corresponding 32-bit elements of the third + * register. The `.u` form rounds up the multiplication results from the most significant discarded bit + * before the addition operations. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15 + * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and + * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed + * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is + * saturated to the range and the OV bit is set to 1. The results after saturation are written to the + * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant + * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of + * the result before the addition operations. + * + * **Operations**:\n + * ~~~ + * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) { + * addop.W[x] = 0x7fffffff; + * OV = 1; + * } else { + * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0]; + * if (`.u` form) { + * Mres[x][47:14] = Mres[x][47:14] + 1; + * } + * addop.W[x] = Mres[x][46:15]; // doubling + * } + * res[x] = Rd.W[x] + addop.W[x]; + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMAWB2(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmmawb2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.45.1. KMMAWB2 ===== */ + +/* ===== Inline Function Start for 3.45.2. KMMAWB2.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief KMMAWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add with Rounding) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMAWB2 Rd, Rs1, Rs2 + * KMMAWB2.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the + * corresponding 32-bit elements of another register, double the multiplication results and add the + * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third + * register. The saturated addition result is written to the corresponding 32-bit elements of the third + * register. The `.u` form rounds up the multiplication results from the most significant discarded bit + * before the addition operations. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15 + * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and + * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed + * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is + * saturated to the range and the OV bit is set to 1. The results after saturation are written to the + * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant + * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of + * the result before the addition operations. + * + * **Operations**:\n + * ~~~ + * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) { + * addop.W[x] = 0x7fffffff; + * OV = 1; + * } else { + * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0]; + * if (`.u` form) { + * Mres[x][47:14] = Mres[x][47:14] + 1; + * } + * addop.W[x] = Mres[x][46:15]; // doubling + * } + * res[x] = Rd.W[x] + addop.W[x]; + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMAWB2_U(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmmawb2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.45.2. KMMAWB2.u ===== */ + +/* ===== Inline Function Start for 3.46.1. KMMAWT ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief KMMAWT (SIMD Saturating MSW Signed Multiply Word and Top Half and Add) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMAWT Rd, Rs1, Rs2 + * KMMAWT.u Rd Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the + * corresponding 32-bit elements of another register and add the most significant 32-bit results with + * the corresponding signed 32-bit elements of a third register. The addition results are written to the + * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication + * results from the most significant discarded bit before the addition operations. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the + * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results + * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31 + * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results + * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the + * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to + * bit 15 of the result before the addition operations. + * + * **Operations**:\n + * ~~~ + * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][47:15] + 1; + * res[x] = Rd.W[x] + Round[x][32:1]; + * } else { + * res[x] = Rd.W[x] + Mres[x][47:16]; + * } + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMAWT(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmmawt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.46.1. KMMAWT ===== */ + +/* ===== Inline Function Start for 3.46.2. KMMAWT.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief KMMAWT.u (SIMD Saturating MSW Signed Multiply Word and Top Half and Add with Rounding) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMAWT Rd, Rs1, Rs2 + * KMMAWT.u Rd Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the + * corresponding 32-bit elements of another register and add the most significant 32-bit results with + * the corresponding signed 32-bit elements of a third register. The addition results are written to the + * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication + * results from the most significant discarded bit before the addition operations. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the + * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results + * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31 + * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results + * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the + * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to + * bit 15 of the result before the addition operations. + * + * **Operations**:\n + * ~~~ + * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][47:15] + 1; + * res[x] = Rd.W[x] + Round[x][32:1]; + * } else { + * res[x] = Rd.W[x] + Mres[x][47:16]; + * } + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMAWT_U(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmmawt.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.46.2. KMMAWT.u ===== */ + +/* ===== Inline Function Start for 3.47.1. KMMAWT2 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief KMMAWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMAWT2 Rd, Rs1, Rs2 + * KMMAWT2.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit elements of one register and the top 16-bit of the + * corresponding 32-bit elements of another register, double the multiplication results and add the + * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third + * register. The saturated addition result is written to the corresponding 32-bit elements of the third + * register. The `.u` form rounds up the multiplication results from the most significant discarded bit + * before the addition operations. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15 + * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and + * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed + * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is + * saturated to the range and the OV bit is set to 1. The results after saturation are written to the + * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant + * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of + * the result before the addition operations. + * + * **Operations**:\n + * ~~~ + * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) { + * addop.W[x] = 0x7fffffff; + * OV = 1; + * } else { + * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1]; + * if (`.u` form) { + * Mres[x][47:14] = Mres[x][47:14] + 1; + * } + * addop.W[x] = Mres[x][46:15]; // doubling + * } + * res[x] = Rd.W[x] + addop.W[x]; + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMAWT2(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmmawt2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.47.1. KMMAWT2 ===== */ + +/* ===== Inline Function Start for 3.47.2. KMMAWT2.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief KMMAWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add with Rounding) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMAWT2 Rd, Rs1, Rs2 + * KMMAWT2.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit elements of one register and the top 16-bit of the + * corresponding 32-bit elements of another register, double the multiplication results and add the + * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third + * register. The saturated addition result is written to the corresponding 32-bit elements of the third + * register. The `.u` form rounds up the multiplication results from the most significant discarded bit + * before the addition operations. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15 + * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and + * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed + * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is + * saturated to the range and the OV bit is set to 1. The results after saturation are written to the + * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant + * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of + * the result before the addition operations. + * + * **Operations**:\n + * ~~~ + * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) { + * addop.W[x] = 0x7fffffff; + * OV = 1; + * } else { + * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1]; + * if (`.u` form) { + * Mres[x][47:14] = Mres[x][47:14] + 1; + * } + * addop.W[x] = Mres[x][46:15]; // doubling + * } + * res[x] = Rd.W[x] + addop.W[x]; + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMAWT2_U(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmmawt2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.47.2. KMMAWT2.u ===== */ + +/* ===== Inline Function Start for 3.48.1. KMMSB ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC + * \brief KMMSB (SIMD Saturating MSW Signed Multiply Word and Subtract) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMSB Rd, Rs1, Rs2 + * KMMSB.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of two registers and subtract the most + * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results + * are written to the third register. The `.u` form performs an additional rounding up operation on + * the multiplication results before subtracting the most significant 32-bit part of the results. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2 + * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of + * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the + * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the + * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by + * adding a 1 to bit 31 of the results. + * + * **Operations**:\n + * ~~~ + * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][63:31] + 1; + * res[x] = Rd.W[x] - Round[x][32:1]; + * } else { + * res[x] = Rd.W[x] - Mres[x][63:32]; + * } + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMSB(long t, long a, long b) +{ + __ASM volatile("kmmsb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.48.1. KMMSB ===== */ + +/* ===== Inline Function Start for 3.48.2. KMMSB.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC + * \brief KMMSB.u (SIMD Saturating MSW Signed Multiply Word and Subtraction with Rounding) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMSB Rd, Rs1, Rs2 + * KMMSB.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of two registers and subtract the most + * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results + * are written to the third register. The `.u` form performs an additional rounding up operation on + * the multiplication results before subtracting the most significant 32-bit part of the results. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2 + * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of + * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the + * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the + * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by + * adding a 1 to bit 31 of the results. + * + * **Operations**:\n + * ~~~ + * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][63:31] + 1; + * res[x] = Rd.W[x] - Round[x][32:1]; + * } else { + * res[x] = Rd.W[x] - Mres[x][63:32]; + * } + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMSB_U(long t, long a, long b) +{ + __ASM volatile("kmmsb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.48.2. KMMSB.u ===== */ + +/* ===== Inline Function Start for 3.49.1. KMMWB2 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief KMMWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMWB2 Rd, Rs1, Rs2 + * KMMWB2.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the + * corresponding 32-bit elements of another register, double the multiplication results and write the + * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u` + * form rounds up the results from the most significant discarded bit. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15 + * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and + * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit + * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit + * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results. + * + * **Operations**:\n + * ~~~ + * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) { + * Rd.W[x] = 0x7fffffff; + * OV = 1; + * } else { + * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][46:14] + 1; + * Rd.W[x] = Round[x][32:1]; + * } else { + * Rd.W[x] = Mres[x][46:15]; + * } + * } + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMWB2(long a, unsigned long b) +{ + register long result; + __ASM volatile("kmmwb2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.49.1. KMMWB2 ===== */ + +/* ===== Inline Function Start for 3.49.2. KMMWB2.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief KMMWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 with Rounding) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMWB2 Rd, Rs1, Rs2 + * KMMWB2.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the + * corresponding 32-bit elements of another register, double the multiplication results and write the + * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u` + * form rounds up the results from the most significant discarded bit. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15 + * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and + * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit + * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit + * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results. + * + * **Operations**:\n + * ~~~ + * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) { + * Rd.W[x] = 0x7fffffff; + * OV = 1; + * } else { + * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][46:14] + 1; + * Rd.W[x] = Round[x][32:1]; + * } else { + * Rd.W[x] = Mres[x][46:15]; + * } + * } + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMWB2_U(long a, unsigned long b) +{ + register long result; + __ASM volatile("kmmwb2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.49.2. KMMWB2.u ===== */ + +/* ===== Inline Function Start for 3.50.1. KMMWT2 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief KMMWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMWT2 Rd, Rs1, Rs2 + * KMMWT2.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the + * corresponding 32-bit elements of another register, double the multiplication results and write the + * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u` + * form rounds up the results from the most significant discarded bit. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15 + * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and + * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit + * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit + * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results. + * + * **Operations**:\n + * ~~~ + * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) { + * Rd.W[x] = 0x7fffffff; + * OV = 1; + * } else { + * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][46:14] + 1; + * Rd.W[x] = Round[x][32:1]; + * } else { + * Rd.W[x] = Mres[x][46:15]; + * } + * } + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMWT2(long a, unsigned long b) +{ + register long result; + __ASM volatile("kmmwt2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.50.1. KMMWT2 ===== */ + +/* ===== Inline Function Start for 3.50.2. KMMWT2.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief KMMWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 with Rounding) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMMWT2 Rd, Rs1, Rs2 + * KMMWT2.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the + * corresponding 32-bit elements of another register, double the multiplication results and write the + * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u` + * form rounds up the results from the most significant discarded bit. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15 + * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and + * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit + * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit + * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results. + * + * **Operations**:\n + * ~~~ + * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) { + * Rd.W[x] = 0x7fffffff; + * OV = 1; + * } else { + * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][46:14] + 1; + * Rd.W[x] = Round[x][32:1]; + * } else { + * Rd.W[x] = Mres[x][46:15]; + * } + * } + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMMWT2_U(long a, unsigned long b) +{ + register long result; + __ASM volatile("kmmwt2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.50.2. KMMWT2.u ===== */ + +/* ===== Inline Function Start for 3.51.1. KMSDA ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief KMSDA (SIMD Saturating Signed Multiply Two Halfs & Add & Subtract) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMSDA Rd, Rs1, Rs2 + * KMSXDA Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The + * subtraction result may be saturated. + * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element) + * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element) + * + * **Description**:\n + * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of + * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. + * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the + * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2. + * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32- + * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is + * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The + * 16-bit contents are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * // KMSDA + * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); + * // KMSXDA + * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMSDA(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmsda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.51.1. KMSDA ===== */ + +/* ===== Inline Function Start for 3.51.2. KMSXDA ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief KMSXDA (SIMD Saturating Signed Crossed Multiply Two Halfs & Add & Subtract) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KMSDA Rd, Rs1, Rs2 + * KMSXDA Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The + * subtraction result may be saturated. + * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element) + * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element) + * + * **Description**:\n + * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of + * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. + * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the + * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2. + * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32- + * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is + * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The + * 16-bit contents are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * // KMSDA + * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); + * // KMSXDA + * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMSXDA(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmsxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.51.2. KMSXDA ===== */ + +/* ===== Inline Function Start for 3.52. KMSR64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB + * \brief KMSR64 (Signed Multiply and Saturating Subtract from 64-Bit Data) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * KMSR64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication + * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is + * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64). + * + * **RV32 Description**:\n + * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It + * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers + * specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the Q63 + * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated + * result is written back to the even/odd pair of registers specified by Rd(4,1). + * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It + * subtracts the 64-bit multiplication results from the 64-bit signed data in Rd with unlimited + * precision. If the 64-bit subtraction result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is + * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd. + * + * **Operations**:\n + * ~~~ + * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * result = R[t_H].R[t_L] - (Rs1 * Rs2); + * if (result > (2^63)-1) { + * result = (2^63)-1; OV = 1; + * } else if (result < -2^63) { + * result = -2^63; OV = 1; + * } + * R[t_H].R[t_L] = result; + * RV64: + * // `result` has unlimited precision + * result = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); + * if (result > (2^63)-1) { + * result = (2^63)-1; OV = 1; + * } else if (result < -2^63) { + * result = -2^63; OV = 1; + * } + * Rd = result; + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_KMSR64(long long t, long a, long b) +{ + __ASM volatile("kmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.52. KMSR64 ===== */ + +/* ===== Inline Function Start for 3.53. KSLLW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief KSLLW (Saturating Shift Left Logical for Word) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KSLLW Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do logical left shift operation with saturation on a 32-bit word. The shift amount is a + * variable from a GPR. + * + * **Description**:\n + * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with + * zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register. Any + * shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated + * to -2^31. And the saturated result is sign-extended and written to Rd. If any saturation is performed, + * set OV bit to 1. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[4:0]; + * res[(31+sa):0] = Rs1.W[0] << sa; + * if (res > (2^31)-1) { + * res = 0x7fffffff; OV = 1; + * } else if (res < -2^31) { + * res = 0x80000000; OV = 1; + * } + * Rd[31:0] = res[31:0]; // RV32 + * Rd[63:0] = SE(res[31:0]); // RV64 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KSLLW(long a, unsigned int b) +{ + register long result; + __ASM volatile("ksllw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.53. KSLLW ===== */ + +/* ===== Inline Function Start for 3.54. KSLLIW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief KSLLIW (Saturating Shift Left Logical Immediate for Word) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KSLLIW Rd, Rs1, imm5u + * ~~~ + * + * **Purpose**:\n + * Do logical left shift operation with saturation on a 32-bit word. The shift amount is an + * immediate value. + * + * **Description**:\n + * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with + * zero and the shift amount is specified by the imm5u constant. Any shifted value greater than 2^31-1 is + * saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated result is + * sign-extended and written to Rd. If any saturation is performed, set OV bit to 1. + * + * **Operations**:\n + * ~~~ + * sa = imm5u; + * res[(31+sa):0] = Rs1.W[0] << sa; + * if (res > (2^31)-1) { + * res = 0x7fffffff; OV = 1; + * } else if (res < -2^31) { + * res = 0x80000000; OV = 1; + * } + * Rd[31:0] = res[31:0]; // RV32 + * Rd[63:0] = SE(res[31:0]); // RV64 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +#define __RV_KSLLIW(a, b) \ + ({ \ + register long result; \ + register long __a = (long)(a); \ + __ASM volatile("kslliw %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.54. KSLLIW ===== */ + +/* ===== Inline Function Start for 3.55. KSLL8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief KSLL8 (SIMD 8-bit Saturating Shift Left Logical) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KSLL8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift + * amount is a variable from a GPR. + * + * **Description**:\n + * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled + * with zero and the shift amount is specified by the low-order 3-bits of the value in the Rs2 register. + * Any shifted value greater than 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is + * saturated to -2^7. And the saturated results are written to Rd. If any saturation is performed, set OV + * bit to 1. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[2:0]; + * if (sa != 0) { + * res[(7+sa):0] = Rs1.B[x] << sa; + * if (res > (2^7)-1) { + * res = 0x7f; OV = 1; + * } else if (res < -2^7) { + * res = 0x80; OV = 1; + * } + * Rd.B[x] = res[7:0]; + * } else { + * Rd = Rs1; + * } + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSLL8(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("ksll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.55. KSLL8 ===== */ + +/* ===== Inline Function Start for 3.56. KSLLI8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief KSLLI8 (SIMD 8-bit Saturating Shift Left Logical Immediate) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KSLLI8 Rd, Rs1, imm3u + * ~~~ + * + * **Purpose**:\n + * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift + * amount is an immediate value. + * + * **Description**:\n + * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled + * with zero and the shift amount is specified by the imm3u constant. Any shifted value greater than + * 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is saturated to -2^7. And the saturated + * results are written to Rd. If any saturation is performed, set OV bit to 1. + * + * **Operations**:\n + * ~~~ + * sa = imm3u[2:0]; + * if (sa != 0) { + * res[(7+sa):0] = Rs1.B[x] << sa; + * if (res > (2^7)-1) { + * res = 0x7f; OV = 1; + * } else if (res < -2^7) { + * res = 0x80; OV = 1; + * } + * Rd.B[x] = res[7:0]; + * } else { + * Rd = Rs1; + * } + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_KSLLI8(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("kslli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.56. KSLLI8 ===== */ + +/* ===== Inline Function Start for 3.57. KSLL16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief KSLL16 (SIMD 16-bit Saturating Shift Left Logical) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KSLL16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift + * amount is a variable from a GPR. + * + * **Description**:\n + * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled + * with zero and the shift amount is specified by the low-order 4-bits of the value in the Rs2 register. + * Any shifted value greater than 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is + * saturated to -2^15. And the saturated results are written to Rd. If any saturation is performed, set OV + * bit to 1. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[3:0]; + * if (sa != 0) { + * res[(15+sa):0] = Rs1.H[x] << sa; + * if (res > (2^15)-1) { + * res = 0x7fff; OV = 1; + * } else if (res < -2^15) { + * res = 0x8000; OV = 1; + * } + * Rd.H[x] = res[15:0]; + * } else { + * Rd = Rs1; + * } + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSLL16(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("ksll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.57. KSLL16 ===== */ + +/* ===== Inline Function Start for 3.58. KSLLI16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief KSLLI16 (SIMD 16-bit Saturating Shift Left Logical Immediate) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KSLLI16 Rd, Rs1, imm4u + * ~~~ + * + * **Purpose**:\n + * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift + * amount is an immediate value. + * + * **Description**:\n + * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled + * with zero and the shift amount is specified by the imm4u constant. Any shifted value greater than + * 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is saturated to -2^15. And the saturated + * results are written to Rd. If any saturation is performed, set OV bit to 1. + * + * **Operations**:\n + * ~~~ + * sa = imm4u[3:0]; + * if (sa != 0) { + * res[(15+sa):0] = Rs1.H[x] << sa; + * if (res > (2^15)-1) { + * res = 0x7fff; OV = 1; + * } else if (res < -2^15) { + * res = 0x8000; OV = 1; + * } + * Rd.H[x] = res[15:0]; + * } else { + * Rd = Rs1; + * } + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_KSLLI16(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("kslli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.58. KSLLI16 ===== */ + +/* ===== Inline Function Start for 3.59.1. KSLRA8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief KSLRA8 (SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KSLRA8 Rd, Rs1, Rs2 + * KSLRA8.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with + * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the + * right shift. + * + * **Description**:\n + * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically + * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means + * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the + * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be + * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`. + * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form + * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit + * position for rounding effect. After the shift, saturation, or rounding, the final results are written to + * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect + * this instruction. + * + * **Operations**:\n + * ~~~ + * if (Rs2[3:0] < 0) { + * sa = -Rs2[3:0]; + * sa = (sa == 8)? 7 : sa; + * if (`.u` form) { + * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1; + * Rd.B[x] = res[7:0]; + * } else { + * Rd.B[x] = SE8(Rs1.B[x][7:sa]); + * } + * } else { + * sa = Rs2[2:0]; + * res[(7+sa):0] = Rs1.B[x] <<(logic) sa; + * if (res > (2^7)-1) { + * res[7:0] = 0x7f; OV = 1; + * } else if (res < -2^7) { + * res[7:0] = 0x80; OV = 1; + * } + * Rd.B[x] = res[7:0]; + * } + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSLRA8(unsigned long a, int b) +{ + register unsigned long result; + __ASM volatile("kslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.59.1. KSLRA8 ===== */ + +/* ===== Inline Function Start for 3.59.2. KSLRA8.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief KSLRA8.u (SIMD 8-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KSLRA8 Rd, Rs1, Rs2 + * KSLRA8.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with + * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the + * right shift. + * + * **Description**:\n + * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically + * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means + * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the + * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be + * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`. + * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form + * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit + * position for rounding effect. After the shift, saturation, or rounding, the final results are written to + * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect + * this instruction. + * + * **Operations**:\n + * ~~~ + * if (Rs2[3:0] < 0) { + * sa = -Rs2[3:0]; + * sa = (sa == 8)? 7 : sa; + * if (`.u` form) { + * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1; + * Rd.B[x] = res[7:0]; + * } else { + * Rd.B[x] = SE8(Rs1.B[x][7:sa]); + * } + * } else { + * sa = Rs2[2:0]; + * res[(7+sa):0] = Rs1.B[x] <<(logic) sa; + * if (res > (2^7)-1) { + * res[7:0] = 0x7f; OV = 1; + * } else if (res < -2^7) { + * res[7:0] = 0x80; OV = 1; + * } + * Rd.B[x] = res[7:0]; + * } + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSLRA8_U(unsigned long a, int b) +{ + register unsigned long result; + __ASM volatile("kslra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.59.2. KSLRA8.u ===== */ + +/* ===== Inline Function Start for 3.60.1. KSLRA16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief KSLRA16 (SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KSLRA16 Rd, Rs1, Rs2 + * KSLRA16.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with + * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the + * right shift. + * + * **Description**:\n + * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically + * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means + * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the + * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be + * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`. + * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u` + * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit + * position for rounding effect. After the shift, saturation, or rounding, the final results are written to + * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect + * this instruction. + * + * **Operations**:\n + * ~~~ + * if (Rs2[4:0] < 0) { + * sa = -Rs2[4:0]; + * sa = (sa == 16)? 15 : sa; + * if (`.u` form) { + * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1; + * Rd.H[x] = res[15:0]; + * } else { + * Rd.H[x] = SE16(Rs1.H[x][15:sa]); + * } + * } else { + * sa = Rs2[3:0]; + * res[(15+sa):0] = Rs1.H[x] <<(logic) sa; + * if (res > (2^15)-1) { + * res[15:0] = 0x7fff; OV = 1; + * } else if (res < -2^15) { + * res[15:0] = 0x8000; OV = 1; + * } + * d.H[x] = res[15:0]; + * } + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSLRA16(unsigned long a, int b) +{ + register unsigned long result; + __ASM volatile("kslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.60.1. KSLRA16 ===== */ + +/* ===== Inline Function Start for 3.60.2. KSLRA16.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief KSLRA16.u (SIMD 16-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KSLRA16 Rd, Rs1, Rs2 + * KSLRA16.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with + * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the + * right shift. + * + * **Description**:\n + * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically + * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means + * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the + * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be + * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`. + * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u` + * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit + * position for rounding effect. After the shift, saturation, or rounding, the final results are written to + * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect + * this instruction. + * + * **Operations**:\n + * ~~~ + * if (Rs2[4:0] < 0) { + * sa = -Rs2[4:0]; + * sa = (sa == 16)? 15 : sa; + * if (`.u` form) { + * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1; + * Rd.H[x] = res[15:0]; + * } else { + * Rd.H[x] = SE16(Rs1.H[x][15:sa]); + * } + * } else { + * sa = Rs2[3:0]; + * res[(15+sa):0] = Rs1.H[x] <<(logic) sa; + * if (res > (2^15)-1) { + * res[15:0] = 0x7fff; OV = 1; + * } else if (res < -2^15) { + * res[15:0] = 0x8000; OV = 1; + * } + * d.H[x] = res[15:0]; + * } + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSLRA16_U(unsigned long a, int b) +{ + register unsigned long result; + __ASM volatile("kslra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.60.2. KSLRA16.u ===== */ + +/* ===== Inline Function Start for 3.61. KSLRAW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief KSLRAW (Shift Left Logical with Q31 Saturation or Shift Right Arithmetic) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KSLRAW Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31 + * saturation for the left shift on a 32-bit data. + * + * **Description**:\n + * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically + * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means + * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the + * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31]. + * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. After the shift + * operation, the final result is bit-31 sign-extended and written to Rd. If any saturation happens, this + * instruction sets the OV flag. The value of Rs2[31:6] will not affected the operation of this instruction. + * + * **Operations**:\n + * ~~~ + * if (Rs2[5:0] < 0) { + * sa = -Rs2[5:0]; + * sa = (sa == 32)? 31 : sa; + * res[31:0] = Rs1.W[0] >>(arith) sa; + * } else { + * sa = Rs2[5:0]; + * tmp = Rs1.W[0] <<(logic) sa; + * if (tmp > (2^31)-1) { + * res[31:0] = (2^31)-1; + * OV = 1; + * } else if (tmp < -2^31) { + * res[31:0] = -2^31; + * OV = 1 + * } else { + * res[31:0] = tmp[31:0]; + * } + * } + * Rd = res[31:0]; // RV32 + * Rd = SE64(res[31:0]); // RV64 + * ~~~ + * + * \param [in] a int type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KSLRAW(int a, int b) +{ + register long result; + __ASM volatile("kslraw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.61. KSLRAW ===== */ + +/* ===== Inline Function Start for 3.62. KSLRAW.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief KSLRAW.u (Shift Left Logical with Q31 Saturation or Rounding Shift Right Arithmetic) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KSLRAW.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31 + * saturation for the left shift and a rounding up operation for the right shift on a 32-bit data. + * + * **Description**:\n + * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically + * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means + * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the + * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31]. + * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. The right-shifted + * result is added a 1 to the most significant discarded bit position for rounding effect. After the shift, + * saturation, or rounding, the final result is bit-31 sign-extended and written to Rd. If any saturation + * happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect the operation of this + * instruction. + * + * **Operations**:\n + * ~~~ + * if (Rs2[5:0] < 0) { + * sa = -Rs2[5:0]; + * sa = (sa == 32)? 31 : sa; + * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1; + * rst[31:0] = res[31:0]; + * } else { + * sa = Rs2[5:0]; + * tmp = Rs1.W[0] <<(logic) sa; + * if (tmp > (2^31)-1) { + * rst[31:0] = (2^31)-1; + * OV = 1; + * } else if (tmp < -2^31) { + * rst[31:0] = -2^31; + * OV = 1 + * } else { + * rst[31:0] = tmp[31:0]; + * } + * } + * Rd = rst[31:0]; // RV32 + * Rd = SE64(rst[31:0]); // RV64 + * ~~~ + * + * \param [in] a int type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KSLRAW_U(int a, int b) +{ + register long result; + __ASM volatile("kslraw.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.62. KSLRAW.u ===== */ + +/* ===== Inline Function Start for 3.63. KSTAS16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief KSTAS16 (SIMD 16-bit Signed Saturating Straight Addition & Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KSTAS16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer element saturating addition and 16-bit signed integer element + * saturating subtraction in a 32-bit chunk simultaneously. Operands are from corresponding + * positions in 32-bit chunks. + * + * **Description**:\n + * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in + * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it + * subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed + * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number + * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated + * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for + * subtraction. + * + * **Operations**:\n + * ~~~ + * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16]; + * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0]; + * for (res in [res1, res2]) { + * if (res > (2^15)-1) { + * res = (2^15)-1; + * OV = 1; + * } else if (res < -2^15) { + * res = -2^15; + * OV = 1; + * } + * } + * Rd.W[x][31:16] = res1; + * Rd.W[x][15:0] = res2; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSTAS16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.63. KSTAS16 ===== */ + +/* ===== Inline Function Start for 3.64. KSTSA16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief KSTSA16 (SIMD 16-bit Signed Saturating Straight Subtraction & Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KSTSA16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element + * saturating addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in + * 32-bit chunks. + * + * **Description**:\n + * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks + * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it + * adds the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 with the 16-bit signed integer + * element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number range (-2^15 + * <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are + * written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd for + * addition. + * + * **Operations**:\n + * ~~~ + * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16]; + * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0]; + * for (res in [res1, res2]) { + * if (res > (2^15)-1) { + * res = (2^15)-1; + * OV = 1; + * } else if (res < -2^15) { + * res = -2^15; + * OV = 1; + * } + * } + * Rd.W[x][31:16] = res1; + * Rd.W[x][15:0] = res2; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSTSA16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.64. KSTSA16 ===== */ + +/* ===== Inline Function Start for 3.65. KSUB8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB + * \brief KSUB8 (SIMD 8-bit Signed Saturating Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KSUB8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit signed elements saturating subtractions simultaneously. + * + * **Description**:\n + * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit + * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 27 + * -1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.B[x] - Rs2.B[x]; + * if (res[x] > (2^7)-1) { + * res[x] = (2^7)-1; + * OV = 1; + * } else if (res[x] < -2^7) { + * res[x] = -2^7; + * OV = 1; + * } + * Rd.B[x] = res[x]; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSUB8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.65. KSUB8 ===== */ + +/* ===== Inline Function Start for 3.66. KSUB16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief KSUB16 (SIMD 16-bit Signed Saturating Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KSUB16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer elements saturating subtractions simultaneously. + * + * **Description**:\n + * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit + * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= + * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to + * Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.H[x] - Rs2.H[x]; + * if (res[x] > (2^15)-1) { + * res[x] = (2^15)-1; + * OV = 1; + * } else if (res[x] < -2^15) { + * res[x] = -2^15; + * OV = 1; + * } + * Rd.H[x] = res[x]; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSUB16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.66. KSUB16 ===== */ + +/* ===== Inline Function Start for 3.67. KSUB64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB + * \brief KSUB64 (64-bit Signed Saturating Subtraction) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * KSUB64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Perform a 64-bit signed integer subtraction. The result is saturated to the Q63 range. + * + * **RV32 Description**:\n + * This instruction subtracts the 64-bit signed integer of an even/odd pair of + * registers specified by Rs2(4,1) from the 64-bit signed integer of an even/odd pair of registers + * specified by Rs1(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is + * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd + * pair of registers specified by Rd(4,1). + * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` + * register of the pair contains the low 32-bit of the operand. + * + * **RV64 Description**:\n + * This instruction subtracts the 64-bit signed integer of Rs2 from the 64-bit signed + * integer of Rs1. If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated + * to the range and the OV bit is set to 1. The saturated result is then written to Rd. + * + * **Operations**:\n + * ~~~ + * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); + * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); + * result = R[a_H].R[a_L] - R[b_H].R[b_L]; + * if (result > (2^63)-1) { + * result = (2^63)-1; OV = 1; + * } else if (result < -2^63) { + * result = -2^63; OV = 1; + * } + * R[t_H].R[t_L] = result; + * RV64: + * result = Rs1 - Rs2; + * if (result > (2^63)-1) { + * result = (2^63)-1; OV = 1; + * } else if (result < -2^63) { + * result = -2^63; OV = 1; + * } + * Rd = result; + * ~~~ + * + * \param [in] a long long type of value stored in a + * \param [in] b long long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_KSUB64(long long a, long long b) +{ + register long long result; + __ASM volatile("ksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.67. KSUB64 ===== */ + +/* ===== Inline Function Start for 3.68. KSUBH ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU + * \brief KSUBH (Signed Subtraction with Q15 Saturation) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KSUBH Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Subtract the signed lower 32-bit content of two registers with Q15 saturation. + * + * **Description**:\n + * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit + * content of Rs1. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then + * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag. + * + * **Operations**:\n + * ~~~ + * tmp = Rs1.W[0] - Rs2.W[0]; + * if (tmp > (2^15)-1) { + * res = (2^15)-1; + * OV = 1; + * } else if (tmp < -2^15) { + * res = -2^15; + * OV = 1 + * } else { + * res = tmp; + * } + * Rd = SE(res[15:0]); + * ~~~ + * + * \param [in] a int type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KSUBH(int a, int b) +{ + register long result; + __ASM volatile("ksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.68. KSUBH ===== */ + +/* ===== Inline Function Start for 3.69. KSUBW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief KSUBW (Signed Subtraction with Q31 Saturation) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * KSUBW Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Subtract the signed lower 32-bit content of two registers with Q31 saturation. + * + * **Description**:\n + * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit + * content of Rs1. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then + * sign-extened and written to Rd. If saturation happens, this instruction sets the OV flag. + * + * **Operations**:\n + * ~~~ + * tmp = Rs1.W[0] - Rs2.W[0]; + * if (tmp > (2^31)-1) { + * res = (2^31)-1; + * OV = 1; + * } else if (tmp < -2^31) { + * res = -2^31; + * OV = 1 + * } else { + * res = tmp; + * } + * Rd = res[31:0]; // RV32 + * Rd = SE(res[31:0]); // RV64 + * ~~~ + * + * \param [in] a int type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KSUBW(int a, int b) +{ + register long result; + __ASM volatile("ksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.69. KSUBW ===== */ + +/* ===== Inline Function Start for 3.70.1. KWMMUL ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC + * \brief KWMMUL (SIMD Saturating MSW Signed Multiply Word & Double) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KWMMUL Rd, Rs1, Rs2 + * KWMMUL.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit, + * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally + * rounds up the multiplication results from the most signification discarded bit. + * + * **Description**:\n + * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts + * the multiplication results one bit to the left and takes the most significant 32-bit results. If the + * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element + * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u` + * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit + * 30 before the shift and saturation operations. + * + * **Operations**:\n + * ~~~ + * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) { + * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; + * if (`.u` form) { + * Round[x][33:0] = Mres[x][63:30] + 1; + * Rd.W[x] = Round[x][32:1]; + * } else { + * Rd.W[x] = Mres[x][62:31]; + * } + * } else { + * Rd.W[x] = 0x7fffffff; + * OV = 1; + * } + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KWMMUL(long a, long b) +{ + register long result; + __ASM volatile("kwmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.70.1. KWMMUL ===== */ + +/* ===== Inline Function Start for 3.70.2. KWMMUL.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC + * \brief KWMMUL.u (SIMD Saturating MSW Signed Multiply Word & Double with Rounding) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * KWMMUL Rd, Rs1, Rs2 + * KWMMUL.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit, + * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally + * rounds up the multiplication results from the most signification discarded bit. + * + * **Description**:\n + * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts + * the multiplication results one bit to the left and takes the most significant 32-bit results. If the + * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element + * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u` + * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit + * 30 before the shift and saturation operations. + * + * **Operations**:\n + * ~~~ + * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) { + * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; + * if (`.u` form) { + * Round[x][33:0] = Mres[x][63:30] + 1; + * Rd.W[x] = Round[x][32:1]; + * } else { + * Rd.W[x] = Mres[x][62:31]; + * } + * } else { + * Rd.W[x] = 0x7fffffff; + * OV = 1; + * } + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KWMMUL_U(long a, long b) +{ + register long result; + __ASM volatile("kwmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.70.2. KWMMUL.u ===== */ + +/* ===== Inline Function Start for 3.71. MADDR32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC + * \brief MADDR32 (Multiply and Add to 32-Bit Word) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * MADDR32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the 32-bit contents of two registers and add the lower 32-bit multiplication result + * to the 32-bit content of a destination register. Write the final result back to the destination register. + * + * **Description**:\n + * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2. It adds the + * lower 32-bit multiplication result to the lower 32-bit content of Rd and writes the final result (RV32) + * or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either signed or + * unsigned integers. + * + * **Operations**:\n + * ~~~ + * RV32: + * Mresult = Rs1 * Rs2; + * Rd = Rd + Mresult.W[0]; + * RV64: + * Mresult = Rs1.W[0] * Rs2.W[0]; + * tres[31:0] = Rd.W[0] + Mresult.W[0]; + * Rd = SE64(tres[31:0]); + * ~~~ + * + * \param [in] t unsigned long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_MADDR32(unsigned long t, unsigned long a, unsigned long b) +{ + __ASM volatile("maddr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.71. MADDR32 ===== */ + +/* ===== Inline Function Start for 3.72. MAXW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION + * \brief MAXW (32-bit Signed Word Maximum) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * MAXW Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Get the larger value from the 32-bit contents of two general registers. + * + * **Description**:\n + * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the + * larger value as the result, and writes the result to Rd. + * + * **Operations**:\n + * ~~~ + * if (Rs1.W[0] >= Rs2.W[0]) { + * Rd = SE(Rs1.W[0]); + * } else { + * Rd = SE(Rs2.W[0]); + * } + * ~~~ + * + * \param [in] a int type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_MAXW(int a, int b) +{ + register long result; + __ASM volatile("maxw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.72. MAXW ===== */ + +/* ===== Inline Function Start for 3.73. MINW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION + * \brief MINW (32-bit Signed Word Minimum) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * MINW Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Get the smaller value from the 32-bit contents of two general registers. + * + * **Description**:\n + * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the + * smaller value as the result, and writes the result to Rd. + * + * **Operations**:\n + * ~~~ + * if (Rs1.W[0] >= Rs2.W[0]) { Rd = SE(Rs2.W[0]); } else { Rd = SE(Rs1.W[0]); } + * ~~~ + * + * \param [in] a int type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_MINW(int a, int b) +{ + register long result; + __ASM volatile("minw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.73. MINW ===== */ + +/* ===== Inline Function Start for 3.74. MSUBR32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC + * \brief MSUBR32 (Multiply and Subtract from 32-Bit Word) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * MSUBR32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the 32-bit contents of two registers and subtract the lower 32-bit multiplication + * result from the 32-bit content of a destination register. Write the final result back to the destination + * register. + * + * **Description**:\n + * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2, subtracts + * the lower 32-bit multiplication result from the lower 32-bit content of Rd, then writes the final + * result (RV32) or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either + * signed or unsigned integers. + * + * **Operations**:\n + * ~~~ + * RV32: + * Mresult = Rs1 * Rs2; + * Rd = Rd - Mresult.W[0]; + * RV64: + * Mresult = Rs1.W[0] * Rs2.W[0]; + * tres[31:0] = Rd.W[0] - Mresult.W[0]; + * Rd = SE64(tres[31:0]); + * ~~~ + * + * \param [in] t unsigned long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_MSUBR32(unsigned long t, unsigned long a, unsigned long b) +{ + __ASM volatile("msubr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.74. MSUBR32 ===== */ + +/* ===== Inline Function Start for 3.75. MULR64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION + * \brief MULR64 (Multiply Word Unsigned to 64-bit Data) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * MULR64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the 32-bit unsigned integer contents of two registers and write the 64-bit result. + * + * **RV32 Description**:\n + * This instruction multiplies the 32-bit content of Rs1 with that of Rs2 and writes the 64-bit + * multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d determines the + * even/odd pair group of the two registers. Specifically, the register pair includes register 2d and + * 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers. + * + * **RV64 Description**:\n + * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2 and writes the 64-bit + * multiplication result to Rd. + * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers. + * + * **Operations**:\n + * ~~~ + * RV32: + * Mresult = CONCAT(1`b0,Rs1) u* CONCAT(1`b0,Rs2); + * R[Rd(4,1).1(0)][31:0] = Mresult[63:32]; + * R[Rd(4,1).0(0)][31:0] = Mresult[31:0]; + * RV64: + * Rd = Mresult[63:0]; + * Mresult = CONCAT(1`b0,Rs1.W[0]) u* CONCAT(1`b0,Rs2.W[0]); + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_MULR64(unsigned long a, unsigned long b) +{ + register unsigned long long result; + __ASM volatile("mulr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.75. MULR64 ===== */ + +/* ===== Inline Function Start for 3.76. MULSR64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION + * \brief MULSR64 (Multiply Word Signed to 64-bit Data) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * MULSR64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the 32-bit signed integer contents of two registers and write the 64-bit result. + * + * **RV32 Description**:\n + * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and + * writes the 64-bit multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d + * determines the even/odd pair group of the two registers. Specifically, the register pair includes + * register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers. + * + * **RV64 Description**:\n + * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and + * writes the 64-bit multiplication result to Rd. + * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * RV32: + * Mresult = Ra s* Rb; + * R[Rd(4,1).1(0)][31:0] = Mresult[63:32]; + * R[Rd(4,1).0(0)][31:0] = Mresult[31:0]; + * RV64: + * Mresult = Ra.W[0] s* Rb.W[0]; + * Rd = Mresult[63:0]; + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_MULSR64(long a, long b) +{ + register long long result; + __ASM volatile("mulsr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.76. MULSR64 ===== */ + +/* ===== Inline Function Start for 3.77. PBSAD ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC + * \brief PBSAD (Parallel Byte Sum of Absolute Difference) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * PBSAD Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Calculate the sum of absolute difference of unsigned 8-bit data elements. + * + * **Description**:\n + * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. Then + * it adds the absolute value of each difference together and writes the result to Rd. + * + * **Operations**:\n + * ~~~ + * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]); + * Rd = SUM(absdiff[x]); + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_PBSAD(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("pbsad %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.77. PBSAD ===== */ + +/* ===== Inline Function Start for 3.78. PBSADA ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC + * \brief PBSADA (Parallel Byte Sum of Absolute Difference Accum) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * PBSADA Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Calculate the sum of absolute difference of four unsigned 8-bit data elements and + * accumulate it into a register. + * + * **Description**:\n + * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. It + * then adds the absolute value of each difference together along with the content of Rd and writes the + * accumulated result back to Rd. + * + * **Operations**:\n + * ~~~ + * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]); + * Rd = Rd + SUM(absdiff[x]); + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] t unsigned long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_PBSADA(unsigned long t, unsigned long a, unsigned long b) +{ + __ASM volatile("pbsada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.78. PBSADA ===== */ + +/* ===== Inline Function Start for 3.79.1. PKBB16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK + * \brief PKBB16 (Pack Two 16-bit Data from Both Bottom Half) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * PKBB16 Rd, Rs1, Rs2 + * PKBT16 Rd, Rs1, Rs2 + * PKTT16 Rd, Rs1, Rs2 + * PKTB16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Pack 16-bit data from 32-bit chunks in two registers. + * * PKBB16: bottom.bottom + * * PKBT16 bottom.top + * * PKTT16 top.top + * * PKTB16 top.bottom + * + * **Description**:\n + * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to + * Rd.W[x] [15:0]. + * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. + * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. + * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0]. + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16 + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16 + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16 + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16 + * for RV32: x=0, + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_PKBB16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("pkbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.79.1. PKBB16 ===== */ + +/* ===== Inline Function Start for 3.79.2. PKBT16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK + * \brief PKBT16 (Pack Two 16-bit Data from Bottom and Top Half) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * PKBB16 Rd, Rs1, Rs2 + * PKBT16 Rd, Rs1, Rs2 + * PKTT16 Rd, Rs1, Rs2 + * PKTB16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Pack 16-bit data from 32-bit chunks in two registers. + * * PKBB16: bottom.bottom + * * PKBT16 bottom.top + * * PKTT16 top.top + * * PKTB16 top.bottom + * + * **Description**:\n + * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to + * Rd.W[x] [15:0]. + * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. + * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. + * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0]. + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16 + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16 + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16 + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16 + * for RV32: x=0, + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_PKBT16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("pkbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.79.2. PKBT16 ===== */ + +/* ===== Inline Function Start for 3.79.3. PKTT16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK + * \brief PKTT16 (Pack Two 16-bit Data from Both Top Half) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * PKBB16 Rd, Rs1, Rs2 + * PKBT16 Rd, Rs1, Rs2 + * PKTT16 Rd, Rs1, Rs2 + * PKTB16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Pack 16-bit data from 32-bit chunks in two registers. + * * PKBB16: bottom.bottom + * * PKBT16 bottom.top + * * PKTT16 top.top + * * PKTB16 top.bottom + * + * **Description**:\n + * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to + * Rd.W[x] [15:0]. + * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. + * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. + * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0]. + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16 + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16 + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16 + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16 + * for RV32: x=0, + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_PKTT16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("pktt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.79.3. PKTT16 ===== */ + +/* ===== Inline Function Start for 3.79.4. PKTB16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK + * \brief PKTB16 (Pack Two 16-bit Data from Top and Bottom Half) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * PKBB16 Rd, Rs1, Rs2 + * PKBT16 Rd, Rs1, Rs2 + * PKTT16 Rd, Rs1, Rs2 + * PKTB16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Pack 16-bit data from 32-bit chunks in two registers. + * * PKBB16: bottom.bottom + * * PKBT16 bottom.top + * * PKTT16 top.top + * * PKTB16 top.bottom + * + * **Description**:\n + * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to + * Rd.W[x] [15:0]. + * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. + * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0]. + * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0]. + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16 + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16 + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16 + * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16 + * for RV32: x=0, + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_PKTB16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("pktb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.79.4. PKTB16 ===== */ + +/* ===== Inline Function Start for 3.80. RADD8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB + * \brief RADD8 (SIMD 8-bit Signed Halving Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * RADD8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit signed integer element additions simultaneously. The element results are halved + * to avoid overflow or saturation. + * + * **Description**:\n + * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed + * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to + * Rd. + * + * **Examples**:\n + * ~~~ + * * Rs1 = 0x7F, Rs2 = 0x7F, Rd = 0x7F + * * Rs1 = 0x80, Rs2 = 0x80, Rd = 0x80 + * * Rs1 = 0x40, Rs2 = 0x80, Rd = 0xE0 + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) s>> 1; for RV32: x=3...0, for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RADD8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("radd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.80. RADD8 ===== */ + +/* ===== Inline Function Start for 3.81. RADD16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief RADD16 (SIMD 16-bit Signed Halving Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * RADD16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer element additions simultaneously. The results are halved to avoid + * overflow or saturation. + * + * **Description**:\n + * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed + * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to + * Rd. + * + * **Examples**:\n + * ~~~ + * * Rs1 = 0x7FFF, Rs2 = 0x7FFF, Rd = 0x7FFF + * * Rs1 = 0x8000, Rs2 = 0x8000, Rd = 0x8000 + * * Rs1 = 0x4000, Rs2 = 0x8000, Rd = 0xE000 + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) s>> 1; for RV32: x=1...0, for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RADD16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("radd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.81. RADD16 ===== */ + +/* ===== Inline Function Start for 3.82. RADD64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB + * \brief RADD64 (64-bit Signed Halving Addition) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * RADD64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Add two 64-bit signed integers. The result is halved to avoid overflow or saturation. + * + * **RV32 Description**:\n + * This instruction adds the 64-bit signed integer of an even/odd pair of registers + * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by + * Rs2(4,1). The 64-bit addition result is first arithmetically right-shifted by 1 bit and then written to an + * even/odd pair of registers specified by Rd(4,1). + * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register + * pair includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed + * integer in Rs2. The 64-bit addition result is first arithmetically right-shifted by 1 bit and then + * written to Rd. + * + * **Operations**:\n + * ~~~ + * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); + * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); + * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) s>> 1; + * RV64: + * Rd = (Rs1 + Rs2) s>> 1; + * ~~~ + * + * \param [in] a long long type of value stored in a + * \param [in] b long long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_RADD64(long long a, long long b) +{ + register long long result; + __ASM volatile("radd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.82. RADD64 ===== */ + +/* ===== Inline Function Start for 3.83. RADDW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION + * \brief RADDW (32-bit Signed Halving Addition) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * RADDW Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Add 32-bit signed integers and the results are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction adds the first 32-bit signed integer in Rs1 with the first 32-bit signed + * integer in Rs2. The result is first arithmetically right-shifted by 1 bit and then sign-extended and + * written to Rd. + * + * **Examples**:\n + * ~~~ + * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF, Rd = 0x7FFFFFFF + * * Rs1 = 0x80000000, Rs2 = 0x80000000, Rd = 0x80000000 + * * Rs1 = 0x40000000, Rs2 = 0x80000000, Rd = 0xE0000000 + * ~~~ + * + * **Operations**:\n + * ~~~ + * RV32: + * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1; + * RV64: + * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1; + * Rd[63:0] = SE(resw[31:0]); + * ~~~ + * + * \param [in] a int type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_RADDW(int a, int b) +{ + register long result; + __ASM volatile("raddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.83. RADDW ===== */ + +/* ===== Inline Function Start for 3.84. RCRAS16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief RCRAS16 (SIMD 16-bit Signed Halving Cross Addition & Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * RCRAS16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in + * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results + * are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in + * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit + * signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer element in + * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and + * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd. + * + * **Examples**:\n + * ~~~ + * Please see `RADD16` and `RSUB16` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1; + * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RCRAS16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("rcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.84. RCRAS16 ===== */ + +/* ===== Inline Function Start for 3.85. RCRSA16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief RCRSA16 (SIMD 16-bit Signed Halving Cross Subtraction & Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * RCRSA16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in + * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results + * are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks + * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit + * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in + * [31:16] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and + * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd. + * + * **Examples**:\n + * ~~~ + * Please see `RADD16` and `RSUB16` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1; + * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RCRSA16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("rcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.85. RCRSA16 ===== */ + +/* ===== Inline Function Start for 3.86. RDOV ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC + * \brief RDOV (Read OV flag) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * RDOV Rd # pseudo mnemonic + * ~~~ + * + * **Purpose**:\n + * This pseudo instruction is an alias to `CSRR Rd, ucode` instruction which maps to the real + * instruction of `CSRRS Rd, ucode, x0`. + * + * + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RDOV(void) +{ + register unsigned long result; + __ASM volatile("rdov %0" : "=r"(result)); + return result; +} +/* ===== Inline Function End for 3.86. RDOV ===== */ + +/* ===== Inline Function Start for 3.87. RSTAS16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief RSTAS16 (SIMD 16-bit Signed Halving Straight Addition & Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * RSTAS16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in + * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The + * results are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in + * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit + * signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer element in + * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and + * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd. + * + * **Examples**:\n + * ~~~ + * Please see `RADD16` and `RSUB16` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) s>> 1; + * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) s>> 1; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RSTAS16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("rstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.87. RSTAS16 ===== */ + +/* ===== Inline Function Start for 3.88. RSTSA16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief RSTSA16 (SIMD 16-bit Signed Halving Straight Subtraction & Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * RSTSA16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in + * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The + * results are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks + * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit + * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in + * [15:0] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and then + * written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd. + * + * **Examples**:\n + * ~~~ + * Please see `RADD16` and `RSUB16` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) s>> 1; + * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) s>> 1; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RSTSA16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("rstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.88. RSTSA16 ===== */ + +/* ===== Inline Function Start for 3.89. RSUB8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB + * \brief RSUB8 (SIMD 8-bit Signed Halving Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * RSUB8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit signed integer element subtractions simultaneously. The results are halved to + * avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit + * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then + * written to Rd. + * + * **Examples**:\n + * ~~~ + * * Rs1 = 0x7F, Rs2 = 0x80, Rd = 0x7F + * * Rs1 = 0x80, Rs2 = 0x7F, Rd = 0x80 + * * Rs1= 0x80, Rs2 = 0x40, Rd = 0xA0 + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) s>> 1; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RSUB8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("rsub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.89. RSUB8 ===== */ + +/* ===== Inline Function Start for 3.90. RSUB16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief RSUB16 (SIMD 16-bit Signed Halving Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * RSUB16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer element subtractions simultaneously. The results are halved to + * avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit + * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then + * written to Rd. + * + * **Examples**:\n + * ~~~ + * * Ra = 0x7FFF, Rb = 0x8000, Rt = 0x7FFF + * * Ra = 0x8000, Rb = 0x7FFF, Rt = 0x8000 + * * Ra = 0x8000, Rb = 0x4000, Rt = 0xA000 + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RSUB16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("rsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.90. RSUB16 ===== */ + +/* ===== Inline Function Start for 3.91. RSUB64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB + * \brief RSUB64 (64-bit Signed Halving Subtraction) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * RSUB64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Perform a 64-bit signed integer subtraction. The result is halved to avoid overflow or + * saturation. + * + * **RV32 Description**:\n + * This instruction subtracts the 64-bit signed integer of an even/odd pair of + * registers specified by Rb(4,1) from the 64-bit signed integer of an even/odd pair of registers + * specified by Ra(4,1). The subtraction result is first arithmetically right-shifted by 1 bit and then + * written to an even/odd pair of registers specified by Rt(4,1). + * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register + * pair includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction subtracts the 64-bit signed integer in Rs2 from the 64-bit signed + * integer in Rs1. The 64-bit subtraction result is first arithmetically right-shifted by 1 bit and then + * written to Rd. + * + * **Operations**:\n + * ~~~ + * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); + * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); + * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) s>> 1; + * RV64: + * Rd = (Rs1 - Rs2) s>> 1; + * ~~~ + * + * \param [in] a long long type of value stored in a + * \param [in] b long long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_RSUB64(long long a, long long b) +{ + register long long result; + __ASM volatile("rsub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.91. RSUB64 ===== */ + +/* ===== Inline Function Start for 3.92. RSUBW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION + * \brief RSUBW (32-bit Signed Halving Subtraction) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * RSUBW Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Subtract 32-bit signed integers and the result is halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit + * signed integer in Rs1. The result is first arithmetically right-shifted by 1 bit and then sign-extended + * and written to Rd. + * + * **Examples**:\n + * ~~~ + * * Rs1 = 0x7FFFFFFF, Rs2 = 0x80000000, Rd = 0x7FFFFFFF + * * Rs1 = 0x80000000, Rs2 = 0x7FFFFFFF, Rd = 0x80000000 + * * Rs1 = 0x80000000, Rs2 = 0x40000000, Rd = 0xA0000000 + * ~~~ + * + * **Operations**:\n + * ~~~ + * RV32: + * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1; + * RV64: + * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1; + * Rd[63:0] = SE(resw[31:0]); + * ~~~ + * + * \param [in] a int type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_RSUBW(int a, int b) +{ + register long result; + __ASM volatile("rsubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.92. RSUBW ===== */ + +/* ===== Inline Function Start for 3.93. SCLIP8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC + * \brief SCLIP8 (SIMD 8-bit Signed Clip Value) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SCLIP8 Rd, Rs1, imm3u[2:0] + * ~~~ + * + * **Purpose**:\n + * Limit the 8-bit signed integer elements of a register into a signed range simultaneously. + * + * **Description**:\n + * This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed + * integer range between 2^imm3u-1 and -2^imm3u, and writes the limited results to Rd. For example, if + * imm3u is 3, the 8-bit input values should be saturated between 7 and -8. If saturation is performed, + * set OV bit to 1. + * + * **Operations**:\n + * ~~~ + * src = Rs1.B[x]; + * if (src > (2^imm3u)-1) { + * src = (2^imm3u)-1; + * OV = 1; + * } else if (src < -2^imm3u) { + * src = -2^imm3u; + * OV = 1; + * } + * Rd.B[x] = src + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_SCLIP8(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("sclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.93. SCLIP8 ===== */ + +/* ===== Inline Function Start for 3.94. SCLIP16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC + * \brief SCLIP16 (SIMD 16-bit Signed Clip Value) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SCLIP16 Rd, Rs1, imm4u[3:0] + * ~~~ + * + * **Purpose**:\n + * Limit the 16-bit signed integer elements of a register into a signed range simultaneously. + * + * **Description**:\n + * This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed + * integer range between 2imm4u-1 and -2imm4u, and writes the limited results to Rd. For example, if + * imm4u is 3, the 16-bit input values should be saturated between 7 and -8. If saturation is performed, + * set OV bit to 1. + * + * **Operations**:\n + * ~~~ + * src = Rs1.H[x]; + * if (src > (2^imm4u)-1) { + * src = (2^imm4u)-1; + * OV = 1; + * } else if (src < -2^imm4u) { + * src = -2^imm4u; + * OV = 1; + * } + * Rd.H[x] = src + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_SCLIP16(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("sclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.94. SCLIP16 ===== */ + +/* ===== Inline Function Start for 3.95. SCLIP32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC + * \brief SCLIP32 (SIMD 32-bit Signed Clip Value) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * SCLIP32 Rd, Rs1, imm5u[4:0] + * ~~~ + * + * **Purpose**:\n + * Limit the 32-bit signed integer elements of a register into a signed range simultaneously. + * + * **Description**:\n + * This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed + * integer range between 2imm5u-1 and -2imm5u, and writes the limited results to Rd. For example, if + * imm5u is 3, the 32-bit input values should be saturated between 7 and -8. If saturation is performed, + * set OV bit to 1. + * + * **Operations**:\n + * ~~~ + * src = Rs1.W[x]; + * if (src > (2^imm5u)-1) { + * src = (2^imm5u)-1; + * OV = 1; + * } else if (src < -2^imm5u) { + * src = -2^imm5u; + * OV = 1; + * } + * Rd.W[x] = src + * for RV32: x=0, + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +#define __RV_SCLIP32(a, b) \ + ({ \ + register long result; \ + register long __a = (long)(a); \ + __ASM volatile("sclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.95. SCLIP32 ===== */ + +/* ===== Inline Function Start for 3.96. SCMPLE8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP + * \brief SCMPLE8 (SIMD 8-bit Signed Compare Less Than & Equal) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SCMPLE8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit signed integer elements less than & equal comparisons simultaneously. + * + * **Description**:\n + * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit + * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is + * true, the result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to + * Rd + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = (Rs1.B[x] {le} Rs2.B[x])? 0xff : 0x0; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SCMPLE8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("scmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.96. SCMPLE8 ===== */ + +/* ===== Inline Function Start for 3.97. SCMPLE16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP + * \brief SCMPLE16 (SIMD 16-bit Signed Compare Less Than & Equal) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SCMPLE16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer elements less than & equal comparisons simultaneously. + * + * **Description**:\n + * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit + * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is + * true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written + * to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = (Rs1.H[x] {le} Rs2.H[x])? 0xffff : 0x0; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SCMPLE16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("scmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.97. SCMPLE16 ===== */ + +/* ===== Inline Function Start for 3.98. SCMPLT8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP + * \brief SCMPLT8 (SIMD 8-bit Signed Compare Less Than) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SCMPLT8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit signed integer elements less than comparisons simultaneously. + * + * **Description**:\n + * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit + * signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the + * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? 0xff : 0x0; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SCMPLT8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("scmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.98. SCMPLT8 ===== */ + +/* ===== Inline Function Start for 3.99. SCMPLT16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP + * \brief SCMPLT16 (SIMD 16-bit Signed Compare Less Than) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SCMPLT16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer elements less than comparisons simultaneously. + * + * **Description**:\n + * This instruction compares the 16-bit signed integer elements in Rs1 with the two 16- + * bit signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the + * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? 0xffff : 0x0; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SCMPLT16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("scmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.99. SCMPLT16 ===== */ + +/* ===== Inline Function Start for 3.100. SLL8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief SLL8 (SIMD 8-bit Shift Left Logical) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SLL8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit elements logical left shift operations simultaneously. The shift amount is a + * variable from a GPR. + * + * **Description**:\n + * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd. + * The shifted out bits are filled with zero and the shift amount is specified by the low-order 3-bits of + * the value in the Rs2 register. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[2:0]; + * Rd.B[x] = Rs1.B[x] << sa; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SLL8(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("sll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.100. SLL8 ===== */ + +/* ===== Inline Function Start for 3.101. SLLI8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief SLLI8 (SIMD 8-bit Shift Left Logical Immediate) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SLLI8 Rd, Rs1, imm3u + * ~~~ + * + * **Purpose**:\n + * Do 8-bit elements logical left shift operations simultaneously. The shift amount is an + * immediate value. + * + * **Description**:\n + * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd. + * The shifted out bits are filled with zero and the shift amount is specified by the imm3u constant. + * + * **Operations**:\n + * ~~~ + * sa = imm3u[2:0]; + * Rd.B[x] = Rs1.B[x] << sa; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_SLLI8(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("slli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.101. SLLI8 ===== */ + +/* ===== Inline Function Start for 3.102. SLL16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief SLL16 (SIMD 16-bit Shift Left Logical) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SLL16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit elements logical left shift operations simultaneously. The shift amount is a + * variable from a GPR. + * + * **Description**:\n + * The 16-bit elements in Rs1 are left-shifted logically. And the results are written to Rd. + * The shifted out bits are filled with zero and the shift amount is specified by the low-order 4-bits of + * the value in the Rs2 register. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[3:0]; + * Rd.H[x] = Rs1.H[x] << sa; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SLL16(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("sll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.102. SLL16 ===== */ + +/* ===== Inline Function Start for 3.103. SLLI16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief SLLI16 (SIMD 16-bit Shift Left Logical Immediate) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SLLI16 Rd, Rs1, imm4[3:0] + * ~~~ + * + * **Purpose**:\n + * Do 16-bit element logical left shift operations simultaneously. The shift amount is an + * immediate value. + * + * **Description**:\n + * The 16-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with + * zero and the shift amount is specified by the imm4[3:0] constant. And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm4[3:0]; + * Rd.H[x] = Rs1.H[x] << sa; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_SLLI16(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("slli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.103. SLLI16 ===== */ + +/* ===== Inline Function Start for 3.104. SMAL ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB + * \brief SMAL (Signed Multiply Halfs & Add 64-bit) + * \details + * **Type**: Partial-SIMD + * + * **Syntax**:\n + * ~~~ + * SMAL Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed bottom 16-bit content of the 32-bit elements of a register with the top + * 16-bit content of the same 32-bit elements of the same register, and add the results with a 64-bit + * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back + * to another even/odd pair of registers (RV32) or a register (RV64). + * + * **RV32 Description**:\n + * This instruction multiplies the bottom 16-bit content of the lower 32-bit of Rs2 with the top 16-bit + * content of the lower 32-bit of Rs2 and adds the result with the 64-bit value of an even/odd pair of + * registers specified by Rs1(4,1). The 64-bit addition result is written back to an even/odd pair of + * registers specified by Rd(4,1). The 16-bit values of Rs2, and the 64-bit value of the Rs1(4,1) register- + * pair are treated as signed integers. + * Rx(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` + * register of the pair contains the low 32-bit of the operand. + * + * **RV64 Description**:\n + * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs2 with the top 16-bit + * content of the same 32-bit elements of Rs2 and adds the results with the 64-bit value of Rs1. The 64- + * bit addition result is written back to Rd. The 16-bit values of Rs2, and the 64-bit value of Rs1 are + * treated as signed integers. + * + * **Operations**:\n + * ~~~ + * RV32: + * Mres[31:0] = Rs2.H[1] * Rs2.H[0]; + * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1); + + * Idx2 = CONCAT(Rd(4,1),1'b0); Idx3 = CONCAT(Rd(4,1),1'b1); + * R[Idx3].R[Idx2] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); + * RV64: + * Mres[0][31:0] = Rs2.W[0].H[1] * Rs2.W[0].H[0]; + * Mres[1][31:0] = Rs2.W[1].H[1] * Rs2.W[1].H[0]; + * Rd = Rs1 + SE64(Mres[1][31:0]) + SE64(Mres[0][31:0]); + * ~~~ + * + * \param [in] a long long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_SMAL(long long a, unsigned long b) +{ + register long long result; + __ASM volatile("smal %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.104. SMAL ===== */ + +/* ===== Inline Function Start for 3.105.1. SMALBB ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB + * \brief SMALBB (Signed Multiply Bottom Halfs & Add 64-bit) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * SMALBB Rd, Rs1, Rs2 + * SMALBT Rd, Rs1, Rs2 + * SMALTT Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit + * content of the corresponding 32-bit elements of another register and add the results with a 64-bit + * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back + * to the register-pair (RV32) or the register (RV64). + * * SMALBB: rt pair + bottom*bottom (all 32-bit elements) + * * SMALBT rt pair + bottom*top (all 32-bit elements) + * * SMALTT rt pair + top*top (all 32-bit elements) + * + * **RV32 Description**:\n + * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit + * content of Rs2. + * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit + * content of Rs2. + * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content + * of Rs2. + * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by + * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and + * Rs2, and the 64-bit value of the register-pair are treated as signed integers. + * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` + * register of the pair contains the low 32-bit of the operand. + * + * **RV64 Description**:\n + * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2. + * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2. + * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the top 16-bit content of the 32-bit elements of Rs2. + * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written + * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * RV32: + * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB + * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT + * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT + * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); + * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); + * RV64: + * // SMALBB + * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0]; + * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0]; + * // SMALBT + * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1]; + * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1]; + * // SMALTT + * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1]; + * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1]; + * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_SMALBB(long long t, unsigned long a, unsigned long b) +{ + __ASM volatile("smalbb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.105.1. SMALBB ===== */ + +/* ===== Inline Function Start for 3.105.2. SMALBT ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB + * \brief SMALBT (Signed Multiply Bottom Half & Top Half & Add 64-bit) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * SMALBB Rd, Rs1, Rs2 + * SMALBT Rd, Rs1, Rs2 + * SMALTT Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit + * content of the corresponding 32-bit elements of another register and add the results with a 64-bit + * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back + * to the register-pair (RV32) or the register (RV64). + * * SMALBB: rt pair + bottom*bottom (all 32-bit elements) + * * SMALBT rt pair + bottom*top (all 32-bit elements) + * * SMALTT rt pair + top*top (all 32-bit elements) + * + * **RV32 Description**:\n + * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit + * content of Rs2. + * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit + * content of Rs2. + * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content + * of Rs2. + * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by + * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and + * Rs2, and the 64-bit value of the register-pair are treated as signed integers. + * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` + * register of the pair contains the low 32-bit of the operand. + * + * **RV64 Description**:\n + * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2. + * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2. + * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the top 16-bit content of the 32-bit elements of Rs2. + * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written + * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * RV32: + * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB + * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT + * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT + * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); + * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); + * RV64: + * // SMALBB + * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0]; + * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0]; + * // SMALBT + * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1]; + * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1]; + * // SMALTT + * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1]; + * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1]; + * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_SMALBT(long long t, unsigned long a, unsigned long b) +{ + __ASM volatile("smalbt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.105.2. SMALBT ===== */ + +/* ===== Inline Function Start for 3.105.3. SMALTT ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB + * \brief SMALTT (Signed Multiply Top Halfs & Add 64-bit) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * SMALBB Rd, Rs1, Rs2 + * SMALBT Rd, Rs1, Rs2 + * SMALTT Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit + * content of the corresponding 32-bit elements of another register and add the results with a 64-bit + * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back + * to the register-pair (RV32) or the register (RV64). + * * SMALBB: rt pair + bottom*bottom (all 32-bit elements) + * * SMALBT rt pair + bottom*top (all 32-bit elements) + * * SMALTT rt pair + top*top (all 32-bit elements) + * + * **RV32 Description**:\n + * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit + * content of Rs2. + * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit + * content of Rs2. + * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content + * of Rs2. + * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by + * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and + * Rs2, and the 64-bit value of the register-pair are treated as signed integers. + * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` + * register of the pair contains the low 32-bit of the operand. + * + * **RV64 Description**:\n + * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2. + * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2. + * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the top 16-bit content of the 32-bit elements of Rs2. + * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written + * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * RV32: + * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB + * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT + * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT + * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); + * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); + * RV64: + * // SMALBB + * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0]; + * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0]; + * // SMALBT + * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1]; + * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1]; + * // SMALTT + * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1]; + * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1]; + * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_SMALTT(long long t, unsigned long a, unsigned long b) +{ + __ASM volatile("smaltt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.105.3. SMALTT ===== */ + +/* ===== Inline Function Start for 3.106.1. SMALDA ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB + * \brief SMALDA (Signed Multiply Two Halfs and Two Adds 64-bit) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * SMALDA Rd, Rs1, Rs2 + * SMALXDA Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together. + * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements) + * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements) + * + * **RV32 Description**:\n + * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit + * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with + * the top 16-bit content of Rs2 with unlimited precision. + * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit + * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1 + * with the top 16-bit content of Rs2 with unlimited precision. + * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64- + * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64- + * bit value of the register-pair are treated as signed integers. + * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` + * register of the pair contains the low 32-bit of the operand. + * + * **RV64 Description**:\n + * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of + * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32- + * bit elements of Rs2 with unlimited precision. + * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of + * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the + * 32-bit elements of Rs2 with unlimited precision. + * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The + * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * RV32: + * // SMALDA + * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]); + * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]); + * // SMALXDA + * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]); + * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]); + * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); + * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]); + * RV64: + * // SMALDA + * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]); + * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]); + * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]); + * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]); + * // SMALXDA + * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]); + * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]); + * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]); + * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]); + * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) + + * SE64(Mres1[1][31:0]); + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_SMALDA(long long t, unsigned long a, unsigned long b) +{ + __ASM volatile("smalda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.106.1. SMALDA ===== */ + +/* ===== Inline Function Start for 3.106.2. SMALXDA ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB + * \brief SMALXDA (Signed Crossed Multiply Two Halfs and Two Adds 64-bit) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * SMALDA Rd, Rs1, Rs2 + * SMALXDA Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together. + * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements) + * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements) + * + * **RV32 Description**:\n + * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit + * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with + * the top 16-bit content of Rs2 with unlimited precision. + * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit + * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1 + * with the top 16-bit content of Rs2 with unlimited precision. + * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64- + * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64- + * bit value of the register-pair are treated as signed integers. + * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` + * register of the pair contains the low 32-bit of the operand. + * + * **RV64 Description**:\n + * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of + * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32- + * bit elements of Rs2 with unlimited precision. + * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of + * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the + * 32-bit elements of Rs2 with unlimited precision. + * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The + * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * RV32: + * // SMALDA + * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]); + * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]); + * // SMALXDA + * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]); + * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]); + * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); + * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]); + * RV64: + * // SMALDA + * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]); + * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]); + * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]); + * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]); + * // SMALXDA + * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]); + * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]); + * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]); + * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]); + * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) + + * SE64(Mres1[1][31:0]); + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_SMALXDA(long long t, unsigned long a, unsigned long b) +{ + __ASM volatile("smalxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.106.2. SMALXDA ===== */ + +/* ===== Inline Function Start for 3.107.1. SMALDS ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB + * \brief SMALDS (Signed Multiply Two Halfs & Subtract & Add 64-bit) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * SMALDS Rd, Rs1, Rs2 + * SMALDRS Rd, Rs1, Rs2 + * SMALXDS Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to + * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is + * written back to the register-pair. + * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements) + * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements) + * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements) + * + * **RV32 Description**:\n + * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit + * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of + * Rs1 with the top 16-bit content of Rs2. + * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content + * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1 + * with the bottom 16-bit content of Rs2. + * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit + * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of + * Rs1 with the bottom 16-bit content of Rs2. + * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by + * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and + * Rs2, and the 64-bit value of the register-pair are treated as signed integers. + * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` + * register of the pair contains the low 32-bit of the operand. + * + * **RV64 Description**:\n + * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the + * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content + * of the 32-bit elements of Rs2. + * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of + * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of + * the 32-bit elements of Rs2. + * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the + * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit + * content of the 32-bit elements of Rs2. + * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written + * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * * RV32: + * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS + * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS + * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS + * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); + * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); + * * RV64: + * // SMALDS + * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]); + * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]); + * // SMALDRS + * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]); + * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]); + * // SMALXDS + * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]); + * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]); + * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_SMALDS(long long t, unsigned long a, unsigned long b) +{ + __ASM volatile("smalds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.107.1. SMALDS ===== */ + +/* ===== Inline Function Start for 3.107.2. SMALDRS ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB + * \brief SMALDRS (Signed Multiply Two Halfs & Reverse Subtract & Add 64- bit) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * SMALDS Rd, Rs1, Rs2 + * SMALDRS Rd, Rs1, Rs2 + * SMALXDS Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to + * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is + * written back to the register-pair. + * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements) + * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements) + * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements) + * + * **RV32 Description**:\n + * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit + * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of + * Rs1 with the top 16-bit content of Rs2. + * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content + * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1 + * with the bottom 16-bit content of Rs2. + * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit + * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of + * Rs1 with the bottom 16-bit content of Rs2. + * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by + * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and + * Rs2, and the 64-bit value of the register-pair are treated as signed integers. + * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` + * register of the pair contains the low 32-bit of the operand. + * + * **RV64 Description**:\n + * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the + * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content + * of the 32-bit elements of Rs2. + * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of + * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of + * the 32-bit elements of Rs2. + * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the + * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit + * content of the 32-bit elements of Rs2. + * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written + * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * * RV32: + * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS + * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS + * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS + * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); + * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); + * * RV64: + * // SMALDS + * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]); + * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]); + * // SMALDRS + * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]); + * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]); + * // SMALXDS + * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]); + * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]); + * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_SMALDRS(long long t, unsigned long a, unsigned long b) +{ + __ASM volatile("smaldrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.107.2. SMALDRS ===== */ + +/* ===== Inline Function Start for 3.107.3. SMALXDS ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB + * \brief SMALXDS (Signed Crossed Multiply Two Halfs & Subtract & Add 64- bit) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * SMALDS Rd, Rs1, Rs2 + * SMALDRS Rd, Rs1, Rs2 + * SMALXDS Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to + * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is + * written back to the register-pair. + * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements) + * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements) + * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements) + * + * **RV32 Description**:\n + * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit + * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of + * Rs1 with the top 16-bit content of Rs2. + * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content + * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1 + * with the bottom 16-bit content of Rs2. + * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit + * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of + * Rs1 with the bottom 16-bit content of Rs2. + * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by + * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and + * Rs2, and the 64-bit value of the register-pair are treated as signed integers. + * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` + * register of the pair contains the low 32-bit of the operand. + * + * **RV64 Description**:\n + * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the + * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content + * of the 32-bit elements of Rs2. + * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of + * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of + * the 32-bit elements of Rs2. + * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the + * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit + * content of the 32-bit elements of Rs2. + * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written + * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * * RV32: + * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS + * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS + * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS + * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); + * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]); + * * RV64: + * // SMALDS + * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]); + * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]); + * // SMALDRS + * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]); + * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]); + * // SMALXDS + * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]); + * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]); + * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]); + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_SMALXDS(long long t, unsigned long a, unsigned long b) +{ + __ASM volatile("smalxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.107.3. SMALXDS ===== */ + +/* ===== Inline Function Start for 3.108. SMAR64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB + * \brief SMAR64 (Signed Multiply and Add to 64-Bit Data) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * SMAR64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication + * result to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is written + * back to the pair of registers (RV32) or a register (RV64). + * + * **RV32 Description**:\n + * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds + * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by + * Rd(4,1). The addition result is written back to the even/odd pair of registers specified by Rd(4,1). + * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It + * adds the 64-bit multiplication results to the 64-bit signed data of Rd. The addition result is written + * back to Rd. + * + * **Operations**:\n + * ~~~ + * * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2); + * * RV64: + * Rd = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]); + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_SMAR64(long long t, long a, long b) +{ + __ASM volatile("smar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.108. SMAR64 ===== */ + +/* ===== Inline Function Start for 3.109. SMAQA ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD + * \brief SMAQA (Signed Multiply Four Bytes with 32-bit Adds) + * \details + * **Type**: Partial-SIMD (Reduction) + * + * **Syntax**:\n + * ~~~ + * SMAQA Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do four signed 8-bit multiplications from 32-bit chunks of two registers; and then adds + * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together. + * + * **Description**:\n + * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four + * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the signed + * content of the corresponding 32-bit chunks of Rd. The final results are written back to the + * corresponding 32-bit chunks in Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rd.W[x] + + * (Rs1.W[x].B[3] s* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] s* Rs2.W[x].B[2]) + + * (Rs1.W[x].B[1] s* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] s* Rs2.W[x].B[0]); + * Rd.W[x] = res[x]; + * for RV32: x=0, + * for RV64: x=1,0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMAQA(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("smaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.109. SMAQA ===== */ + +/* ===== Inline Function Start for 3.110. SMAQA.SU ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD + * \brief SMAQA.SU (Signed and Unsigned Multiply Four Bytes with 32-bit Adds) + * \details + * **Type**: Partial-SIMD (Reduction) + * + * **Syntax**:\n + * ~~~ + * SMAQA.SU Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do four `signed x unsigned` 8-bit multiplications from 32-bit chunks of two registers; and + * then adds the four 16-bit results and the content of corresponding 32-bit chunks of a third register + * together. + * + * **Description**:\n + * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four + * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the + * signed content of the corresponding 32-bit chunks of Rd. The final results are written back to the + * corresponding 32-bit chunks in Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rd.W[x] + + * (Rs1.W[x].B[3] su* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] su* Rs2.W[x].B[2]) + + * (Rs1.W[x].B[1] su* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] su* Rs2.W[x].B[0]); + * Rd.W[x] = res[x]; + * for RV32: x=0, + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMAQA_SU(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("smaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.110. SMAQA.SU ===== */ + +/* ===== Inline Function Start for 3.111. SMAX8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC + * \brief SMAX8 (SIMD 8-bit Signed Maximum) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMAX8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit signed integer elements finding maximum operations simultaneously. + * + * **Description**:\n + * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit + * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The + * selected results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = (Rs1.B[x] > Rs2.B[x])? Rs1.B[x] : Rs2.B[x]; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SMAX8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("smax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.111. SMAX8 ===== */ + +/* ===== Inline Function Start for 3.112. SMAX16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC + * \brief SMAX16 (SIMD 16-bit Signed Maximum) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMAX16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer elements finding maximum operations simultaneously. + * + * **Description**:\n + * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit + * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The + * selected results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = (Rs1.H[x] > Rs2.H[x])? Rs1.H[x] : Rs2.H[x]; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SMAX16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("smax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.112. SMAX16 ===== */ + +/* ===== Inline Function Start for 3.113.1. SMBB16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief SMBB16 (SIMD Signed Multiply Bottom Half & Bottom Half) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMBB16 Rd, Rs1, Rs2 + * SMBT16 Rd, Rs1, Rs2 + * SMTT16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16- + * bit content of the 32-bit elements of another register and write the result to a third register. + * * SMBB16: W[x].bottom*W[x].bottom + * * SMBT16: W[x].bottom *W[x].top + * * SMTT16: W[x].top * W[x].top + * + * **Description**:\n + * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2. + * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2. + * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the top 16-bit content of the 32-bit elements of Rs2. + * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16 + * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16 + * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16 + * for RV32: x=0, + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMBB16(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("smbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.113.1. SMBB16 ===== */ + +/* ===== Inline Function Start for 3.113.2. SMBT16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief SMBT16 (SIMD Signed Multiply Bottom Half & Top Half) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMBB16 Rd, Rs1, Rs2 + * SMBT16 Rd, Rs1, Rs2 + * SMTT16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16- + * bit content of the 32-bit elements of another register and write the result to a third register. + * * SMBB16: W[x].bottom*W[x].bottom + * * SMBT16: W[x].bottom *W[x].top + * * SMTT16: W[x].top * W[x].top + * + * **Description**:\n + * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2. + * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2. + * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the top 16-bit content of the 32-bit elements of Rs2. + * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16 + * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16 + * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16 + * for RV32: x=0, + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMBT16(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("smbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.113.2. SMBT16 ===== */ + +/* ===== Inline Function Start for 3.113.3. SMTT16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief SMTT16 (SIMD Signed Multiply Top Half & Top Half) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMBB16 Rd, Rs1, Rs2 + * SMBT16 Rd, Rs1, Rs2 + * SMTT16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16- + * bit content of the 32-bit elements of another register and write the result to a third register. + * * SMBB16: W[x].bottom*W[x].bottom + * * SMBT16: W[x].bottom *W[x].top + * * SMTT16: W[x].top * W[x].top + * + * **Description**:\n + * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2. + * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2. + * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the top 16-bit content of the 32-bit elements of Rs2. + * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16 + * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16 + * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16 + * for RV32: x=0, + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMTT16(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("smtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.113.3. SMTT16 ===== */ + +/* ===== Inline Function Start for 3.114.1. SMDS ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief SMDS (SIMD Signed Multiply Two Halfs and Subtract) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMDS Rd, Rs1, Rs2 + * SMDRS Rd, Rs1, Rs2 + * SMXDS Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * perform a subtraction operation between the two 32-bit results. + * * SMDS: top*top - bottom*bottom (per 32-bit element) + * * SMDRS: bottom*bottom - top*top (per 32-bit element) + * * SMXDS: top*bottom - bottom*top (per 32-bit element) + * + * **Description**:\n + * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with + * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result + * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the + * 32-bit elements of Rs2. + * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of + * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of + * the 32-bit elements of Rs2. + * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the + * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit + * content of the 32-bit elements of Rs2. + * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of + * multiplication are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * * SMDS: + * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); + * * SMDRS: + * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); + * * SMXDS: + * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMDS(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("smds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.114.1. SMDS ===== */ + +/* ===== Inline Function Start for 3.114.2. SMDRS ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief SMDRS (SIMD Signed Multiply Two Halfs and Reverse Subtract) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMDS Rd, Rs1, Rs2 + * SMDRS Rd, Rs1, Rs2 + * SMXDS Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * perform a subtraction operation between the two 32-bit results. + * * SMDS: top*top - bottom*bottom (per 32-bit element) + * * SMDRS: bottom*bottom - top*top (per 32-bit element) + * * SMXDS: top*bottom - bottom*top (per 32-bit element) + * + * **Description**:\n + * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with + * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result + * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the + * 32-bit elements of Rs2. + * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of + * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of + * the 32-bit elements of Rs2. + * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the + * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit + * content of the 32-bit elements of Rs2. + * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of + * multiplication are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * * SMDS: + * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); + * * SMDRS: + * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); + * * SMXDS: + * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMDRS(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("smdrs %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.114.2. SMDRS ===== */ + +/* ===== Inline Function Start for 3.114.3. SMXDS ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB + * \brief SMXDS (SIMD Signed Crossed Multiply Two Halfs and Subtract) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMDS Rd, Rs1, Rs2 + * SMDRS Rd, Rs1, Rs2 + * SMXDS Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * perform a subtraction operation between the two 32-bit results. + * * SMDS: top*top - bottom*bottom (per 32-bit element) + * * SMDRS: bottom*bottom - top*top (per 32-bit element) + * * SMXDS: top*bottom - bottom*top (per 32-bit element) + * + * **Description**:\n + * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with + * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result + * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the + * 32-bit elements of Rs2. + * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of + * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of + * the 32-bit elements of Rs2. + * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the + * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit + * content of the 32-bit elements of Rs2. + * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of + * multiplication are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * * SMDS: + * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]); + * * SMDRS: + * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); + * * SMXDS: + * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMXDS(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("smxds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.114.3. SMXDS ===== */ + +/* ===== Inline Function Start for 3.115. SMIN8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC + * \brief SMIN8 (SIMD 8-bit Signed Minimum) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMIN8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit signed integer elements finding minimum operations simultaneously. + * + * **Description**:\n + * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit + * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected + * results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? Rs1.B[x] : Rs2.B[x]; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SMIN8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("smin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.115. SMIN8 ===== */ + +/* ===== Inline Function Start for 3.116. SMIN16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC + * \brief SMIN16 (SIMD 16-bit Signed Minimum) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMIN16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer elements finding minimum operations simultaneously. + * + * **Description**:\n + * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit + * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected + * results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? Rs1.H[x] : Rs2.H[x]; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SMIN16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("smin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.116. SMIN16 ===== */ + +/* ===== Inline Function Start for 3.117.1. SMMUL ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC + * \brief SMMUL (SIMD MSW Signed Multiply Word) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMMUL Rd, Rs1, Rs2 + * SMMUL.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the 32-bit signed integer elements of two registers and write the most significant + * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an + * additional rounding up operation on the multiplication results before taking the most significant + * 32-bit part of the results. + * + * **Description**:\n + * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the + * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit + * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up + * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results. + * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction. + * + * **Operations**:\n + * ~~~ + * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][63:31] + 1; + * Rd.W[x] = Round[x][32:1]; + * } else { + * Rd.W[x] = Mres[x][63:32]; + * } + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMMUL(long a, long b) +{ + register long result; + __ASM volatile("smmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.117.1. SMMUL ===== */ + +/* ===== Inline Function Start for 3.117.2. SMMUL.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC + * \brief SMMUL.u (SIMD MSW Signed Multiply Word with Rounding) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMMUL Rd, Rs1, Rs2 + * SMMUL.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the 32-bit signed integer elements of two registers and write the most significant + * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an + * additional rounding up operation on the multiplication results before taking the most significant + * 32-bit part of the results. + * + * **Description**:\n + * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the + * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit + * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up + * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results. + * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction. + * + * **Operations**:\n + * ~~~ + * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][63:31] + 1; + * Rd.W[x] = Round[x][32:1]; + * } else { + * Rd.W[x] = Mres[x][63:32]; + * } + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMMUL_U(long a, long b) +{ + register long result; + __ASM volatile("smmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.117.2. SMMUL.u ===== */ + +/* ===== Inline Function Start for 3.118.1. SMMWB ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief SMMWB (SIMD MSW Signed Multiply Word and Bottom Half) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMMWB Rd, Rs1, Rs2 + * SMMWB.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the + * corresponding 32-bit elements of another register, and write the most significant 32-bit results to + * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most + * significant discarded bit. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content + * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication + * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the + * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results. + * + * **Operations**:\n + * ~~~ + * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][47:15] + 1; + * Rd.W[x] = Round[x][32:1]; + * } else { + * Rd.W[x] = Mres[x][47:16]; + * } + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMMWB(long a, unsigned long b) +{ + register long result; + __ASM volatile("smmwb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.118.1. SMMWB ===== */ + +/* ===== Inline Function Start for 3.118.2. SMMWB.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief SMMWB.u (SIMD MSW Signed Multiply Word and Bottom Half with Rounding) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMMWB Rd, Rs1, Rs2 + * SMMWB.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the + * corresponding 32-bit elements of another register, and write the most significant 32-bit results to + * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most + * significant discarded bit. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content + * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication + * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the + * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results. + * + * **Operations**:\n + * ~~~ + * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][47:15] + 1; + * Rd.W[x] = Round[x][32:1]; + * } else { + * Rd.W[x] = Mres[x][47:16]; + * } + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMMWB_U(long a, unsigned long b) +{ + register long result; + __ASM volatile("smmwb.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.118.2. SMMWB.u ===== */ + +/* ===== Inline Function Start for 3.119.1. SMMWT ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief SMMWT (SIMD MSW Signed Multiply Word and Top Half) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMMWT Rd, Rs1, Rs2 + * SMMWT.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the + * corresponding 32-bit elements of another register, and write the most significant 32-bit results to + * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most + * significant discarded bit. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of + * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication + * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the + * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results. + * + * **Operations**:\n + * ~~~ + * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][47:15] + 1; + * Rd.W[x] = Round[x][32:1]; + * } else { + * Rd.W[x] = Mres[x][47:16]; + * } + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMMWT(long a, unsigned long b) +{ + register long result; + __ASM volatile("smmwt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.119.1. SMMWT ===== */ + +/* ===== Inline Function Start for 3.119.2. SMMWT.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC + * \brief SMMWT.u (SIMD MSW Signed Multiply Word and Top Half with Rounding) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMMWT Rd, Rs1, Rs2 + * SMMWT.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the + * corresponding 32-bit elements of another register, and write the most significant 32-bit results to + * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most + * significant discarded bit. + * + * **Description**:\n + * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of + * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication + * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the + * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results. + * + * **Operations**:\n + * ~~~ + * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1]; + * if (`.u` form) { + * Round[x][32:0] = Mres[x][47:15] + 1; + * Rd.W[x] = Round[x][32:1]; + * } else { + * Rd.W[x] = Mres[x][47:16]; + * } + * for RV32: x=0 + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMMWT_U(long a, unsigned long b) +{ + register long result; + __ASM volatile("smmwt.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.119.2. SMMWT.u ===== */ + +/* ===== Inline Function Start for 3.120.1. SMSLDA ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB + * \brief SMSLDA (Signed Multiply Two Halfs & Add & Subtract 64-bit) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * SMSLDA Rd, Rs1, Rs2 + * SMSLXDA Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a + * register (RV64). The subtraction result is written back to the register-pair. + * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements) + * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements) + * + * **RV32 Description**:\n + * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit + * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2. + * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit + * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2. + * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers + * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit + * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers. + * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of + * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. + * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of + * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. + * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction + * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated + * as signed integers. + * + * **Operations**:\n + * ~~~ + * * RV32: + * // SMSLDA + * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]); + * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]); + * // SMSLXDA + * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]); + * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]); + * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); + * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]); + * * RV64: + * // SMSLDA + * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]); + * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]); + * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]); + * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]); + * // SMSLXDA + * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]); + * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]); + * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]); + * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]); + * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) - + * SE64(Mres1[1][31:0]); + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_SMSLDA(long long t, unsigned long a, unsigned long b) +{ + __ASM volatile("smslda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.120.1. SMSLDA ===== */ + +/* ===== Inline Function Start for 3.120.2. SMSLXDA ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB + * \brief SMSLXDA (Signed Crossed Multiply Two Halfs & Add & Subtract 64- bit) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * SMSLDA Rd, Rs1, Rs2 + * SMSLXDA Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then + * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a + * register (RV64). The subtraction result is written back to the register-pair. + * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements) + * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements) + * + * **RV32 Description**:\n + * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit + * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2. + * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit + * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2. + * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers + * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit + * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers. + * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 + * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of + * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. + * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with + * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of + * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. + * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction + * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated + * as signed integers. + * + * **Operations**:\n + * ~~~ + * * RV32: + * // SMSLDA + * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]); + * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]); + * // SMSLXDA + * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]); + * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]); + * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1); + * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]); + * * RV64: + * // SMSLDA + * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]); + * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]); + * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]); + * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]); + * // SMSLXDA + * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]); + * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]); + * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]); + * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]); + * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) - + * SE64(Mres1[1][31:0]); + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_SMSLXDA(long long t, unsigned long a, unsigned long b) +{ + __ASM volatile("smslxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.120.2. SMSLXDA ===== */ + +/* ===== Inline Function Start for 3.121. SMSR64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB + * \brief SMSR64 (Signed Multiply and Subtract from 64- Bit Data) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * SMSR64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication + * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is + * written back to the pair of registers (RV32) or a register (RV64). + * + * **RV32 Description**:\n + * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It + * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers + * specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers + * specified by Rd(4,1). + * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It + * subtracts the 64-bit multiplication results from the 64-bit signed data of Rd. The subtraction result is + * written back to Rd. + * + * **Operations**:\n + * ~~~ + * * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2); + * * RV64: + * Rd = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); + * ~~~ + * + * \param [in] t long long type of value stored in t + * \param [in] a long type of value stored in a + * \param [in] b long type of value stored in b + * \return value stored in long long type + */ +__STATIC_FORCEINLINE long long __RV_SMSR64(long long t, long a, long b) +{ + __ASM volatile("smsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.121. SMSR64 ===== */ + +/* ===== Inline Function Start for 3.122.1. SMUL8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY + * \brief SMUL8 (SIMD Signed 8-bit Multiply) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMUL8 Rd, Rs1, Rs2 + * SMULX8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do signed 8-bit multiplications and generate four 16-bit results simultaneously. + * + * **RV32 Description**:\n + * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the + * corresponding 8-bit data elements of Rs2. + * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the + * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data + * elements of Rs1 with the fourth and third 8-bit data elements of Rs2. + * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1). + * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of + * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom + * part of Rs1. + * + * **RV64 Description**:\n + * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the + * corresponding 8-bit data elements of Rs2. + * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the + * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data + * elements of Rs1 with the fourth and third 8-bit data elements of Rs2. + * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results + * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from + * the bottom part of Rs1. + * + * **Operations**:\n + * ~~~ + * * RV32: + * if (is `SMUL8`) { + * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top + * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom + * } else if (is `SMULX8`) { + * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top + * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom + * } + * rest[x/2] = op1t[x/2] s* op2t[x/2]; + * resb[x/2] = op1b[x/2] s* op2b[x/2]; + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1]; + * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0]; + * x = 0 and 2 + * * RV64: + * if (is `SMUL8`) { + * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top + * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom + * } else if (is `SMULX8`) { + * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top + * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom + * } + * rest[x/2] = op1t[x/2] s* op2t[x/2]; + * resb[x/2] = op1b[x/2] s* op2b[x/2]; + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1]; + * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; + * x = 0 and 2 + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_SMUL8(unsigned int a, unsigned int b) +{ + register unsigned long long result; + __ASM volatile("smul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.122.1. SMUL8 ===== */ + +/* ===== Inline Function Start for 3.122.2. SMULX8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY + * \brief SMULX8 (SIMD Signed Crossed 8-bit Multiply) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMUL8 Rd, Rs1, Rs2 + * SMULX8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do signed 8-bit multiplications and generate four 16-bit results simultaneously. + * + * **RV32 Description**:\n + * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the + * corresponding 8-bit data elements of Rs2. + * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the + * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data + * elements of Rs1 with the fourth and third 8-bit data elements of Rs2. + * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1). + * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of + * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom + * part of Rs1. + * + * **RV64 Description**:\n + * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the + * corresponding 8-bit data elements of Rs2. + * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the + * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data + * elements of Rs1 with the fourth and third 8-bit data elements of Rs2. + * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results + * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from + * the bottom part of Rs1. + * + * **Operations**:\n + * ~~~ + * * RV32: + * if (is `SMUL8`) { + * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top + * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom + * } else if (is `SMULX8`) { + * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top + * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom + * } + * rest[x/2] = op1t[x/2] s* op2t[x/2]; + * resb[x/2] = op1b[x/2] s* op2b[x/2]; + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1]; + * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0]; + * x = 0 and 2 + * * RV64: + * if (is `SMUL8`) { + * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top + * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom + * } else if (is `SMULX8`) { + * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top + * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom + * } + * rest[x/2] = op1t[x/2] s* op2t[x/2]; + * resb[x/2] = op1b[x/2] s* op2b[x/2]; + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1]; + * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; + * x = 0 and 2 + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_SMULX8(unsigned int a, unsigned int b) +{ + register unsigned long long result; + __ASM volatile("smulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.122.2. SMULX8 ===== */ + +/* ===== Inline Function Start for 3.123.1. SMUL16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY + * \brief SMUL16 (SIMD Signed 16-bit Multiply) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMUL16 Rd, Rs1, Rs2 + * SMULX16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do signed 16-bit multiplications and generate two 32-bit results simultaneously. + * + * **RV32 Description**:\n + * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with + * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 + * with the bottom 16-bit Q15 content of Rs2. + * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit + * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16- + * bit Q15 content of Rs2. + * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1), + * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes + * register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and + * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1. + * + * **RV64 Description**:\n + * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower + * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, + * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15 + * content of the lower 32-bit word in Rs2. + * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1 + * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the + * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the + * lower 32-bit word in Rs2. + * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the + * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of + * the lower 32-bit word in Rs1 is written to Rd.W[0] + * + * **Operations**:\n + * ~~~ + * * RV32: + * if (is `SMUL16`) { + * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top + * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom + * } else if (is `SMULX16`) { + * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top + * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom + * } + * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { + * res = aop s* bop; + * } + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * R[t_H] = rest; + * R[t_L] = resb; + * * RV64: + * if (is `SMUL16`) { + * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top + * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom + * } else if (is `SMULX16`) { + * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top + * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom + * } + * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { + * res = aop s* bop; + * } + * Rd.W[1] = rest; + * Rd.W[0] = resb; + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_SMUL16(unsigned int a, unsigned int b) +{ + register unsigned long long result; + __ASM volatile("smul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.123.1. SMUL16 ===== */ + +/* ===== Inline Function Start for 3.123.2. SMULX16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY + * \brief SMULX16 (SIMD Signed Crossed 16-bit Multiply) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SMUL16 Rd, Rs1, Rs2 + * SMULX16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do signed 16-bit multiplications and generate two 32-bit results simultaneously. + * + * **RV32 Description**:\n + * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with + * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 + * with the bottom 16-bit Q15 content of Rs2. + * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit + * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16- + * bit Q15 content of Rs2. + * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1), + * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes + * register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and + * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1. + * + * **RV64 Description**:\n + * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower + * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, + * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15 + * content of the lower 32-bit word in Rs2. + * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1 + * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the + * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the + * lower 32-bit word in Rs2. + * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the + * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of + * the lower 32-bit word in Rs1 is written to Rd.W[0] + * + * **Operations**:\n + * ~~~ + * * RV32: + * if (is `SMUL16`) { + * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top + * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom + * } else if (is `SMULX16`) { + * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top + * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom + * } + * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { + * res = aop s* bop; + * } + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * R[t_H] = rest; + * R[t_L] = resb; + * * RV64: + * if (is `SMUL16`) { + * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top + * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom + * } else if (is `SMULX16`) { + * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top + * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom + * } + * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { + * res = aop s* bop; + * } + * Rd.W[1] = rest; + * Rd.W[0] = resb; + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_SMULX16(unsigned int a, unsigned int b) +{ + register unsigned long long result; + __ASM volatile("smulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.123.2. SMULX16 ===== */ + +/* ===== Inline Function Start for 3.124. SRA.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC + * \brief SRA.u (Rounding Shift Right Arithmetic) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * SRA.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Perform an arithmetic right shift operation with rounding. The shift amount is a variable + * from a GPR. + * + * **Description**:\n + * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are + * filled with the sign-bit and the shift amount is specified by the low-order 5-bits (RV32) or 6-bits + * (RV64) of the Rs2 register. For the rounding operation, a value of 1 is added to the most significant + * discarded bit of the data to calculate the final result. And the result is written to Rd. + * + * **Operations**:\n + * ~~~ + * * RV32: + * sa = Rs2[4:0]; + * if (sa > 0) { + * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1; + * Rd = res[31:0]; + * } else { + * Rd = Rs1; + * } + * * RV64: + * sa = Rs2[5:0]; + * if (sa > 0) { + * res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1; + * Rd = res[63:0]; + * } else { + * Rd = Rs1; + * } + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SRA_U(long a, unsigned int b) +{ + register long result; + __ASM volatile("sra.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.124. SRA.u ===== */ + +/* ===== Inline Function Start for 3.125. SRAI.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC + * \brief SRAI.u (Rounding Shift Right Arithmetic Immediate) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * SRAI.u Rd, Rs1, imm6u[4:0] (RV32) + * SRAI.u Rd, Rs1, imm6u[5:0] (RV64) + * ~~~ + * + * **Purpose**:\n + * Perform an arithmetic right shift operation with rounding. The shift amount is an + * immediate value. + * + * **Description**:\n + * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are + * filled with the sign-bit and the shift amount is specified by the imm6u[4:0] (RV32) or imm6u[5:0] + * (RV64) constant . For the rounding operation, a value of 1 is added to the most significant discarded + * bit of the data to calculate the final result. And the result is written to Rd. + * + * **Operations**:\n + * ~~~ + * * RV32: + * sa = imm6u[4:0]; + * if (sa > 0) { + * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1; + * Rd = res[31:0]; + * } else { + * Rd = Rs1; + * } + * * RV64: + * sa = imm6u[5:0]; + * if (sa > 0) { + * res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1; + * Rd = res[63:0]; + * } else { + * Rd = Rs1; + * } + * ~~~ + * + * \param [in] a long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +#define __RV_SRAI_U(a, b) \ + ({ \ + register long result; \ + register long __a = (long)(a); \ + __ASM volatile("srai.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.125. SRAI.u ===== */ + +/* ===== Inline Function Start for 3.126.1. SRA8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief SRA8 (SIMD 8-bit Shift Right Arithmetic) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRA8 Rd, Rs1, Rs2 + * SRA8.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a + * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out + * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order + * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is + * added to the most significant discarded bit of each 8-bit data element to calculate the final results. + * And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[2:0]; + * if (sa > 0) { + * if (`.u` form) { // SRA8.u + * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1; + * Rd.B[x] = res[7:0]; + * } else { // SRA8 + * Rd.B[x] = SE8(Rd.B[x][7:sa]) + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRA8(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("sra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.126.1. SRA8 ===== */ + +/* ===== Inline Function Start for 3.126.2. SRA8.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief SRA8.u (SIMD 8-bit Rounding Shift Right Arithmetic) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRA8 Rd, Rs1, Rs2 + * SRA8.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a + * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out + * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order + * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is + * added to the most significant discarded bit of each 8-bit data element to calculate the final results. + * And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[2:0]; + * if (sa > 0) { + * if (`.u` form) { // SRA8.u + * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1; + * Rd.B[x] = res[7:0]; + * } else { // SRA8 + * Rd.B[x] = SE8(Rd.B[x][7:sa]) + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRA8_U(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("sra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.126.2. SRA8.u ===== */ + +/* ===== Inline Function Start for 3.127.1. SRAI8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief SRAI8 (SIMD 8-bit Shift Right Arithmetic Immediate) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRAI8 Rd, Rs1, imm3u + * SRAI8.u Rd, Rs1, imm3u + * ~~~ + * + * **Purpose**:\n + * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an + * immediate value. The `.u` form performs additional rounding up operations on the shifted results. + * + * **Description**:\n + * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out + * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u + * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant + * discarded bit of each 8-bit data element to calculate the final results. And the results are written to + * Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm3u[2:0]; + * if (sa > 0) { + * if (`.u` form) { // SRA8.u + * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1; + * Rd.B[x] = res[7:0]; + * } else { // SRA8 + * Rd.B[x] = SE8(Rd.B[x][7:sa]) + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_SRAI8(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("srai8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.127.1. SRAI8 ===== */ + +/* ===== Inline Function Start for 3.127.2. SRAI8.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief SRAI8.u (SIMD 8-bit Rounding Shift Right Arithmetic Immediate) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRAI8 Rd, Rs1, imm3u + * SRAI8.u Rd, Rs1, imm3u + * ~~~ + * + * **Purpose**:\n + * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an + * immediate value. The `.u` form performs additional rounding up operations on the shifted results. + * + * **Description**:\n + * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out + * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u + * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant + * discarded bit of each 8-bit data element to calculate the final results. And the results are written to + * Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm3u[2:0]; + * if (sa > 0) { + * if (`.u` form) { // SRA8.u + * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1; + * Rd.B[x] = res[7:0]; + * } else { // SRA8 + * Rd.B[x] = SE8(Rd.B[x][7:sa]) + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_SRAI8_U(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("srai8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.127.2. SRAI8.u ===== */ + +/* ===== Inline Function Start for 3.128.1. SRA16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief SRA16 (SIMD 16-bit Shift Right Arithmetic) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRA16 Rd, Rs1, Rs2 + * SRA16.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a + * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out + * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order + * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is + * added to the most significant discarded bit of each 16-bit data element to calculate the final results. + * And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[3:0]; + * if (sa != 0) { + * if (`.u` form) { // SRA16.u + * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1; + * Rd.H[x] = res[15:0]; + * } else { // SRA16 + * Rd.H[x] = SE16(Rs1.H[x][15:sa]) + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRA16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("sra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.128.1. SRA16 ===== */ + +/* ===== Inline Function Start for 3.128.2. SRA16.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief SRA16.u (SIMD 16-bit Rounding Shift Right Arithmetic) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRA16 Rd, Rs1, Rs2 + * SRA16.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a + * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out + * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order + * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is + * added to the most significant discarded bit of each 16-bit data element to calculate the final results. + * And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[3:0]; + * if (sa != 0) { + * if (`.u` form) { // SRA16.u + * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1; + * Rd.H[x] = res[15:0]; + * } else { // SRA16 + * Rd.H[x] = SE16(Rs1.H[x][15:sa]) + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRA16_U(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("sra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.128.2. SRA16.u ===== */ + +/* ===== Inline Function Start for 3.129.1. SRAI16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief SRAI16 (SIMD 16-bit Shift Right Arithmetic Immediate) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRAI16 Rd, Rs1, imm4u + * SRAI16.u Rd, Rs1, imm4u + * ~~~ + * + * **Purpose**:\n + * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is + * an immediate value. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out + * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the + * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most + * significant discarded bit of each 16-bit data to calculate the final results. And the results are written + * to Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm4u[3:0]; + * if (sa > 0) { + * if (`.u` form) { // SRAI16.u + * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1; + * Rd.H[x] = res[15:0]; + * } else { // SRAI16 + * Rd.H[x] = SE16(Rs1.H[x][15:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_SRAI16(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("srai16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.129.1. SRAI16 ===== */ + +/* ===== Inline Function Start for 3.129.2. SRAI16.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief SRAI16.u (SIMD 16-bit Rounding Shift Right Arithmetic Immediate) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRAI16 Rd, Rs1, imm4u + * SRAI16.u Rd, Rs1, imm4u + * ~~~ + * + * **Purpose**:\n + * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is + * an immediate value. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out + * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the + * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most + * significant discarded bit of each 16-bit data to calculate the final results. And the results are written + * to Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm4u[3:0]; + * if (sa > 0) { + * if (`.u` form) { // SRAI16.u + * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1; + * Rd.H[x] = res[15:0]; + * } else { // SRAI16 + * Rd.H[x] = SE16(Rs1.H[x][15:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_SRAI16_U(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("srai16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.129.2. SRAI16.u ===== */ + +/* ===== Inline Function Start for 3.130.1. SRL8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief SRL8 (SIMD 8-bit Shift Right Logical) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRL8 Rt, Ra, Rb + * SRL8.u Rt, Ra, Rb + * ~~~ + * + * **Purpose**:\n + * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a + * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are + * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register. + * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded + * bit of each 8-bit data element to calculate the final results. And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[2:0]; + * if (sa > 0) { + * if (`.u` form) { // SRL8.u + * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1; + * Rd.B[x] = res[8:1]; + * } else { // SRL8 + * Rd.B[x] = ZE8(Rs1.B[x][7:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRL8(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("srl8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.130.1. SRL8 ===== */ + +/* ===== Inline Function Start for 3.130.2. SRL8.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief SRL8.u (SIMD 8-bit Rounding Shift Right Logical) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRL8 Rt, Ra, Rb + * SRL8.u Rt, Ra, Rb + * ~~~ + * + * **Purpose**:\n + * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a + * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are + * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register. + * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded + * bit of each 8-bit data element to calculate the final results. And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[2:0]; + * if (sa > 0) { + * if (`.u` form) { // SRL8.u + * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1; + * Rd.B[x] = res[8:1]; + * } else { // SRL8 + * Rd.B[x] = ZE8(Rs1.B[x][7:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRL8_U(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("srl8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.130.2. SRL8.u ===== */ + +/* ===== Inline Function Start for 3.131.1. SRLI8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief SRLI8 (SIMD 8-bit Shift Right Logical Immediate) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRLI8 Rt, Ra, imm3u + * SRLI8.u Rt, Ra, imm3u + * ~~~ + * + * **Purpose**:\n + * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an + * immediate value. The `.u` form performs additional rounding up operations on the shifted results. + * + * **Description**:\n + * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are + * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of + * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to + * calculate the final results. And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm3u[2:0]; + * if (sa > 0) { + * if (`.u` form) { // SRLI8.u + * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1; + * Rd.B[x] = res[8:1]; + * } else { // SRLI8 + * Rd.B[x] = ZE8(Rs1.B[x][7:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_SRLI8(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("srli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.131.1. SRLI8 ===== */ + +/* ===== Inline Function Start for 3.131.2. SRLI8.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT + * \brief SRLI8.u (SIMD 8-bit Rounding Shift Right Logical Immediate) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRLI8 Rt, Ra, imm3u + * SRLI8.u Rt, Ra, imm3u + * ~~~ + * + * **Purpose**:\n + * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an + * immediate value. The `.u` form performs additional rounding up operations on the shifted results. + * + * **Description**:\n + * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are + * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of + * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to + * calculate the final results. And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm3u[2:0]; + * if (sa > 0) { + * if (`.u` form) { // SRLI8.u + * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1; + * Rd.B[x] = res[8:1]; + * } else { // SRLI8 + * Rd.B[x] = ZE8(Rs1.B[x][7:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_SRLI8_U(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("srli8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.131.2. SRLI8.u ===== */ + +/* ===== Inline Function Start for 3.132.1. SRL16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief SRL16 (SIMD 16-bit Shift Right Logical) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRL16 Rt, Ra, Rb + * SRL16.u Rt, Ra, Rb + * ~~~ + * + * **Purpose**:\n + * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results. + * + * **Description**:\n + * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits + * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2 + * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant + * discarded bit of each 16-bit data element to calculate the final results. And the results are written to + * Rd. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[3:0]; + * if (sa > 0) { + * if (`.u` form) { // SRL16.u + * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1; + * Rd.H[x] = res[16:1]; + * } else { // SRL16 + * Rd.H[x] = ZE16(Rs1.H[x][15:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRL16(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("srl16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.132.1. SRL16 ===== */ + +/* ===== Inline Function Start for 3.132.2. SRL16.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief SRL16.u (SIMD 16-bit Rounding Shift Right Logical) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRL16 Rt, Ra, Rb + * SRL16.u Rt, Ra, Rb + * ~~~ + * + * **Purpose**:\n + * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results. + * + * **Description**:\n + * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits + * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2 + * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant + * discarded bit of each 16-bit data element to calculate the final results. And the results are written to + * Rd. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[3:0]; + * if (sa > 0) { + * if (`.u` form) { // SRL16.u + * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1; + * Rd.H[x] = res[16:1]; + * } else { // SRL16 + * Rd.H[x] = ZE16(Rs1.H[x][15:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRL16_U(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("srl16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.132.2. SRL16.u ===== */ + +/* ===== Inline Function Start for 3.133.1. SRLI16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief SRLI16 (SIMD 16-bit Shift Right Logical Immediate) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRLI16 Rt, Ra, imm4u + * SRLI16.u Rt, Ra, imm4u + * ~~~ + * + * **Purpose**:\n + * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an + * immediate value. The `.u` form performs additional rounding up operations on the shifted results. + * + * **Description**:\n + * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits + * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding + * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit + * data element to calculate the final results. And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm4u; + * if (sa > 0) { + * if (`.u` form) { // SRLI16.u + * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1; + * Rd.H[x] = res[16:1]; + * } else { // SRLI16 + * Rd.H[x] = ZE16(Rs1.H[x][15:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_SRLI16(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("srli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.133.1. SRLI16 ===== */ + +/* ===== Inline Function Start for 3.133.2. SRLI16.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT + * \brief SRLI16.u (SIMD 16-bit Rounding Shift Right Logical Immediate) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SRLI16 Rt, Ra, imm4u + * SRLI16.u Rt, Ra, imm4u + * ~~~ + * + * **Purpose**:\n + * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an + * immediate value. The `.u` form performs additional rounding up operations on the shifted results. + * + * **Description**:\n + * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits + * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding + * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit + * data element to calculate the final results. And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm4u; + * if (sa > 0) { + * if (`.u` form) { // SRLI16.u + * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1; + * Rd.H[x] = res[16:1]; + * } else { // SRLI16 + * Rd.H[x] = ZE16(Rs1.H[x][15:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_SRLI16_U(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("srli16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.133.2. SRLI16.u ===== */ + +/* ===== Inline Function Start for 3.134. STAS16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief STAS16 (SIMD 16-bit Straight Addition & Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * STAS16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit + * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. + * + * **Description**:\n + * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with + * the 16-bit integer element in [31:16] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit + * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [15:0] of 32-bit chunks in + * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32- + * bit chunks in Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned operations. + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][31:16]; + * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][15:0]; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_STAS16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("stas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.134. STAS16 ===== */ + +/* ===== Inline Function Start for 3.135. STSA16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief STSA16 (SIMD 16-bit Straight Subtraction & Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * STSA16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit + * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. + * + * **Description**:\n + * This instruction subtracts the 16-bit integer element in [31:16] of 32-bit chunks in Rs2 + * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of + * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [15:0] of 32-bit chunks in + * Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to [15:0] of + * 32-bit chunks in Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned operations. + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][31:16]; + * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][15:0]; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_STSA16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("stsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.135. STSA16 ===== */ + +/* ===== Inline Function Start for 3.136. SUB8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB + * \brief SUB8 (SIMD 8-bit Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SUB8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit integer element subtractions simultaneously. + * + * **Description**:\n + * This instruction subtracts the 8-bit integer elements in Rs2 from the 8-bit integer + * elements in Rs1, and then writes the result to Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned subtraction. + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = Rs1.B[x] - Rs2.B[x]; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SUB8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("sub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.136. SUB8 ===== */ + +/* ===== Inline Function Start for 3.137. SUB16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief SUB16 (SIMD 16-bit Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * SUB16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit integer element subtractions simultaneously. + * + * **Description**:\n + * This instruction subtracts the 16-bit integer elements in Rs2 from the 16-bit integer + * elements in Rs1, and then writes the result to Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned subtraction. + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = Rs1.H[x] - Rs2.H[x]; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SUB16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("sub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.137. SUB16 ===== */ + +/* ===== Inline Function Start for 3.138. SUB64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB + * \brief SUB64 (64-bit Subtraction) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * SUB64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Perform a 64-bit signed or unsigned integer subtraction. + * + * **RV32 Description**:\n + * This instruction subtracts the 64-bit integer of an even/odd pair of registers + * specified by Rs2(4,1) from the 64-bit integer of an even/odd pair of registers specified by Rs1(4,1), + * and then writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1). + * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` + * register of the pair contains the low 32-bit of the operand. + * + * **RV64 Description**:\n + * This instruction subtracts the 64-bit integer of Rs2 from the 64-bit integer of Rs1, + * and then writes the 64-bit result to Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned subtraction. + * + * **Operations**:\n + * ~~~ + * * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); + * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); + * R[t_H].R[t_L] = R[a_H].R[a_L] - R[b_H].R[b_L]; + * * RV64: + * Rd = Rs1 - Rs2; + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b unsigned long long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_SUB64(unsigned long long a, unsigned long long b) +{ + register unsigned long long result; + __ASM volatile("sub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.138. SUB64 ===== */ + +/* ===== Inline Function Start for 3.139.1. SUNPKD810 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK + * \brief SUNPKD810 (Signed Unpacking Bytes 1 & 0) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * SUNPKD8xy Rd, Rs1 + * xy = {10, 20, 30, 31, 32} + * ~~~ + * + * **Purpose**:\n + * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords + * of 32-bit chunks in a register. + * + * **Description**:\n + * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into + * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit + * chunks in Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x]) + * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y]) + * // SUNPKD810, x=1,y=0 + * // SUNPKD820, x=2,y=0 + * // SUNPKD830, x=3,y=0 + * // SUNPKD831, x=3,y=1 + * // SUNPKD832, x=3,y=2 + * for RV32: m=0, + * for RV64: m=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SUNPKD810(unsigned long a) +{ + register unsigned long result; + __ASM volatile("sunpkd810 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.139.1. SUNPKD810 ===== */ + +/* ===== Inline Function Start for 3.139.2. SUNPKD820 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK + * \brief SUNPKD820 (Signed Unpacking Bytes 2 & 0) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * SUNPKD8xy Rd, Rs1 + * xy = {10, 20, 30, 31, 32} + * ~~~ + * + * **Purpose**:\n + * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords + * of 32-bit chunks in a register. + * + * **Description**:\n + * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into + * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit + * chunks in Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x]) + * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y]) + * // SUNPKD810, x=1,y=0 + * // SUNPKD820, x=2,y=0 + * // SUNPKD830, x=3,y=0 + * // SUNPKD831, x=3,y=1 + * // SUNPKD832, x=3,y=2 + * for RV32: m=0, + * for RV64: m=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SUNPKD820(unsigned long a) +{ + register unsigned long result; + __ASM volatile("sunpkd820 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.139.2. SUNPKD820 ===== */ + +/* ===== Inline Function Start for 3.139.3. SUNPKD830 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK + * \brief SUNPKD830 (Signed Unpacking Bytes 3 & 0) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * SUNPKD8xy Rd, Rs1 + * xy = {10, 20, 30, 31, 32} + * ~~~ + * + * **Purpose**:\n + * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords + * of 32-bit chunks in a register. + * + * **Description**:\n + * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into + * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit + * chunks in Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x]) + * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y]) + * // SUNPKD810, x=1,y=0 + * // SUNPKD820, x=2,y=0 + * // SUNPKD830, x=3,y=0 + * // SUNPKD831, x=3,y=1 + * // SUNPKD832, x=3,y=2 + * for RV32: m=0, + * for RV64: m=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SUNPKD830(unsigned long a) +{ + register unsigned long result; + __ASM volatile("sunpkd830 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.139.3. SUNPKD830 ===== */ + +/* ===== Inline Function Start for 3.139.4. SUNPKD831 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK + * \brief SUNPKD831 (Signed Unpacking Bytes 3 & 1) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * SUNPKD8xy Rd, Rs1 + * xy = {10, 20, 30, 31, 32} + * ~~~ + * + * **Purpose**:\n + * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords + * of 32-bit chunks in a register. + * + * **Description**:\n + * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into + * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit + * chunks in Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x]) + * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y]) + * // SUNPKD810, x=1,y=0 + * // SUNPKD820, x=2,y=0 + * // SUNPKD830, x=3,y=0 + * // SUNPKD831, x=3,y=1 + * // SUNPKD832, x=3,y=2 + * for RV32: m=0, + * for RV64: m=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SUNPKD831(unsigned long a) +{ + register unsigned long result; + __ASM volatile("sunpkd831 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.139.4. SUNPKD831 ===== */ + +/* ===== Inline Function Start for 3.139.5. SUNPKD832 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK + * \brief SUNPKD832 (Signed Unpacking Bytes 3 & 2) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * SUNPKD8xy Rd, Rs1 + * xy = {10, 20, 30, 31, 32} + * ~~~ + * + * **Purpose**:\n + * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords + * of 32-bit chunks in a register. + * + * **Description**:\n + * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into + * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit + * chunks in Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x]) + * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y]) + * // SUNPKD810, x=1,y=0 + * // SUNPKD820, x=2,y=0 + * // SUNPKD830, x=3,y=0 + * // SUNPKD831, x=3,y=1 + * // SUNPKD832, x=3,y=2 + * for RV32: m=0, + * for RV64: m=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SUNPKD832(unsigned long a) +{ + register unsigned long result; + __ASM volatile("sunpkd832 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.139.5. SUNPKD832 ===== */ + +/* ===== Inline Function Start for 3.140. SWAP8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC + * \brief SWAP8 (Swap Byte within Halfword) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * SWAP8 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Swap the bytes within each halfword of a register. + * + * **Description**:\n + * This instruction swaps the bytes within each halfword of Rs1 and writes the result to + * Rd. + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = CONCAT(Rs1.H[x][7:0],Rs1.H[x][15:8]); + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SWAP8(unsigned long a) +{ + register unsigned long result; + __ASM volatile("swap8 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.140. SWAP8 ===== */ + +/* ===== Inline Function Start for 3.141. SWAP16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC + * \brief SWAP16 (Swap Halfword within Word) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * SWAP16 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Swap the 16-bit halfwords within each word of a register. + * + * **Description**:\n + * This instruction swaps the 16-bit halfwords within each word of Rs1 and writes the + * result to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[x] = CONCAT(Rs1.W[x][15:0],Rs1.H[x][31:16]); + * for RV32: x=0, + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SWAP16(unsigned long a) +{ + register unsigned long result; + __ASM volatile("swap16 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.141. SWAP16 ===== */ + +/* ===== Inline Function Start for 3.142. UCLIP8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC + * \brief UCLIP8 (SIMD 8-bit Unsigned Clip Value) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UCLIP8 Rt, Ra, imm3u + * ~~~ + * + * **Purpose**:\n + * Limit the 8-bit signed elements of a register into an unsigned range simultaneously. + * + * **Description**:\n + * This instruction limits the 8-bit signed elements stored in Rs1 into an unsigned integer + * range between 2^imm3u-1 and 0, and writes the limited results to Rd. For example, if imm3u is 3, the 8- + * bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit to 1. + * + * **Operations**:\n + * ~~~ + * src = Rs1.H[x]; + * if (src > (2^imm3u)-1) { + * src = (2^imm3u)-1; + * OV = 1; + * } else if (src < 0) { + * src = 0; + * OV = 1; + * } + * Rd.H[x] = src; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_UCLIP8(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("uclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.142. UCLIP8 ===== */ + +/* ===== Inline Function Start for 3.143. UCLIP16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC + * \brief UCLIP16 (SIMD 16-bit Unsigned Clip Value) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UCLIP16 Rt, Ra, imm4u + * ~~~ + * + * **Purpose**:\n + * Limit the 16-bit signed elements of a register into an unsigned range simultaneously. + * + * **Description**:\n + * This instruction limits the 16-bit signed elements stored in Rs1 into an unsigned + * integer range between 2imm4u-1 and 0, and writes the limited results to Rd. For example, if imm4u is + * 3, the 16-bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit + * to 1. + * + * **Operations**:\n + * ~~~ + * src = Rs1.H[x]; + * if (src > (2^imm4u)-1) { + * src = (2^imm4u)-1; + * OV = 1; + * } else if (src < 0) { + * src = 0; + * OV = 1; + * } + * Rd.H[x] = src; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_UCLIP16(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("uclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.143. UCLIP16 ===== */ + +/* ===== Inline Function Start for 3.144. UCLIP32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC + * \brief UCLIP32 (SIMD 32-bit Unsigned Clip Value) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UCLIP32 Rd, Rs1, imm5u[4:0] + * ~~~ + * + * **Purpose**:\n + * Limit the 32-bit signed integer elements of a register into an unsigned range + * simultaneously. + * + * **Description**:\n + * This instruction limits the 32-bit signed integer elements stored in Rs1 into an + * unsigned integer range between 2imm5u-1 and 0, and writes the limited results to Rd. For example, if + * imm5u is 3, the 32-bit input values should be saturated between 7 and 0. If saturation is performed, + * set OV bit to 1. + * + * **Operations**:\n + * ~~~ + * src = Rs1.W[x]; + * if (src > (2^imm5u)-1) { + * src = (2^imm5u)-1; + * OV = 1; + * } else if (src < 0) { + * src = 0; + * OV = 1; + * } + * Rd.W[x] = src + * for RV32: x=0, + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_UCLIP32(a, b) \ + ({ \ + register unsigned long result; \ + register unsigned long __a = (unsigned long)(a); \ + __ASM volatile("uclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.144. UCLIP32 ===== */ + +/* ===== Inline Function Start for 3.145. UCMPLE8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP + * \brief UCMPLE8 (SIMD 8-bit Unsigned Compare Less Than & Equal) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UCMPLE8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit unsigned integer elements less than & equal comparisons simultaneously. + * + * **Description**:\n + * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit + * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it + * is true, the result is 0xFF; otherwise, the result is 0x0. The four comparison results are written to + * Rd. + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = (Rs1.B[x] <=u Rs2.B[x])? 0xff : 0x0; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UCMPLE8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ucmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.145. UCMPLE8 ===== */ + +/* ===== Inline Function Start for 3.146. UCMPLE16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP + * \brief UCMPLE16 (SIMD 16-bit Unsigned Compare Less Than & Equal) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UCMPLE16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit unsigned integer elements less than & equal comparisons simultaneously. + * + * **Description**:\n + * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit + * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it + * is true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are + * written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = (Rs1.H[x] <=u Rs2.H[x])? 0xffff : 0x0; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UCMPLE16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ucmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.146. UCMPLE16 ===== */ + +/* ===== Inline Function Start for 3.147. UCMPLT8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP + * \brief UCMPLT8 (SIMD 8-bit Unsigned Compare Less Than) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UCMPLT8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit unsigned integer elements less than comparisons simultaneously. + * + * **Description**:\n + * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit + * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the + * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = (Rs1.B[x] (2^8)-1) { + * res[x] = (2^8)-1; + * OV = 1; + * } + * Rd.B[x] = res[x]; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKADD8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ukadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.149. UKADD8 ===== */ + +/* ===== Inline Function Start for 3.150. UKADD16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief UKADD16 (SIMD 16-bit Unsigned Saturating Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UKADD16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit unsigned integer element saturating additions simultaneously. + * + * **Description**:\n + * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit + * unsigned integer elements in Rs2. If any of the results are beyond the 16-bit unsigned number + * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated + * results are written to Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.H[x] + Rs2.H[x]; + * if (res[x] > (2^16)-1) { + * res[x] = (2^16)-1; + * OV = 1; + * } + * Rd.H[x] = res[x]; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKADD16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ukadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.150. UKADD16 ===== */ + +/* ===== Inline Function Start for 3.151. UKADD64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB + * \brief UKADD64 (64-bit Unsigned Saturating Addition) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * UKADD64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Add two 64-bit unsigned integers. The result is saturated to the U64 range. + * + * **RV32 Description**:\n + * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers + * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by + * Rs2(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the + * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers + * specified by Rd(4,1). + * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned + * integer in Rs2. If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to + * the range and the OV bit is set to 1. The saturated result is written to Rd. + * + * **Operations**:\n + * ~~~ + * * RV32: + * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1); + * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1); + * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1); + * result = R[a_H].R[a_L] + R[b_H].R[b_L]; + * if (result > (2^64)-1) { + * result = (2^64)-1; OV = 1; + * } + * R[t_H].R[t_L] = result; + * * RV64: + * result = Rs1 + Rs2; + * if (result > (2^64)-1) { + * result = (2^64)-1; OV = 1; + * } + * Rd = result; + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b unsigned long long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_UKADD64(unsigned long long a, unsigned long long b) +{ + register unsigned long long result; + __ASM volatile("ukadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.151. UKADD64 ===== */ + +/* ===== Inline Function Start for 3.152. UKADDH ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU + * \brief UKADDH (Unsigned Addition with U16 Saturation) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * UKADDH Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Add the unsigned lower 32-bit content of two registers with U16 saturation. + * + * **Description**:\n + * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit + * content of Rs2. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then + * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag. + * + * **Operations**:\n + * ~~~ + * tmp = Rs1.W[0] + Rs2.W[0]; + * if (tmp > (2^16)-1) { + * tmp = (2^16)-1; + * OV = 1; + * } + * Rd = SE(tmp[15:0]); + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKADDH(unsigned int a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("ukaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.152. UKADDH ===== */ + +/* ===== Inline Function Start for 3.153. UKADDW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief UKADDW (Unsigned Addition with U32 Saturation) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * UKADDW Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Add the unsigned lower 32-bit content of two registers with U32 saturation. + * + * **Description**:\n + * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit + * content of Rs2. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then + * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag. + * + * **Operations**:\n + * ~~~ + * tmp = Rs1.W[0] + Rs2.W[0]; + * if (tmp > (2^32)-1) { + * tmp[31:0] = (2^32)-1; + * OV = 1; + * } + * Rd = tmp[31:0]; // RV32 + * Rd = SE(tmp[31:0]); // RV64 + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKADDW(unsigned int a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("ukaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.153. UKADDW ===== */ + +/* ===== Inline Function Start for 3.154. UKCRAS16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief UKCRAS16 (SIMD 16-bit Unsigned Saturating Cross Addition & Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UKCRAS16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned + * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed + * positions in 32-bit chunks. + * + * **Description**:\n + * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in + * Rs1 with the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it + * subtracts the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit + * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit + * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. + * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit + * chunks in Rd for subtraction. + * + * **Operations**:\n + * ~~~ + * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0]; + * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16]; + * if (res1 > (2^16)-1) { + * res1 = (2^16)-1; + * OV = 1; + * } + * if (res2 < 0) { + * res2 = 0; + * OV = 1; + * } + * Rd.W[x][31:16] = res1; + * Rd.W[x][15:0] = res2; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKCRAS16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ukcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.154. UKCRAS16 ===== */ + +/* ===== Inline Function Start for 3.155. UKCRSA16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief UKCRSA16 (SIMD 16-bit Unsigned Saturating Cross Subtraction & Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UKCRSA16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned + * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from crossed + * positions in 32-bit chunks. + * + * **Description**:\n + * This instruction subtracts the 16-bit unsigned integer element in [15:0] of 32-bit + * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the + * same time, it adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 with the 16- + * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the + * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set + * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of + * 32-bit chunks in Rd for addition. + * + * **Operations**:\n + * ~~~ + * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0]; + * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16]; + * if (res1 < 0) { + * res1 = 0; + * OV = 1; + * } else if (res2 > (2^16)-1) { + * res2 = (2^16)-1; + * OV = 1; + * } + * Rd.W[x][31:16] = res1; + * Rd.W[x][15:0] = res2; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKCRSA16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ukcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.155. UKCRSA16 ===== */ + +/* ===== Inline Function Start for 3.156. UKMAR64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB + * \brief UKMAR64 (Unsigned Multiply and Saturating Add to 64-Bit Data) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * UKMAR64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication + * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is + * saturated to the U64 range and written back to the pair of registers (RV32) or the register (RV64). + * + * **RV32 Description**:\n + * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It + * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers + * specified by Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the U64 number + * range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is + * written back to the even/odd pair of registers specified by Rd(4,1). + * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2. + * It adds the 64-bit multiplication results to the 64-bit unsigned data in Rd with unlimited precision. If + * the 64-bit addition result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the + * range and the OV bit is set to 1. The saturated result is written back to Rd. + * + * **Operations**:\n + * ~~~ + * * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * result = R[t_H].R[t_L] + (Rs1 * Rs2); + * if (result > (2^64)-1) { + * result = (2^64)-1; OV = 1; + * } + * R[t_H].R[t_L] = result; + * * RV64: + * // `result` has unlimited precision + * result = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]); + * if (result > (2^64)-1) { + * result = (2^64)-1; OV = 1; + * } + * Rd = result; + * ~~~ + * + * \param [in] t unsigned long long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_UKMAR64(unsigned long long t, unsigned long a, unsigned long b) +{ + __ASM volatile("ukmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.156. UKMAR64 ===== */ + +/* ===== Inline Function Start for 3.157. UKMSR64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB + * \brief UKMSR64 (Unsigned Multiply and Saturating Subtract from 64-Bit Data) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * UKMSR64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit + * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). + * The result is saturated to the U64 range and written back to the pair of registers (RV32) or a register + * (RV64). + * + * **RV32 Description**:\n + * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It + * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of + * registers specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the + * U64 number range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The + * saturated result is written back to the even/odd pair of registers specified by Rd(4,1). + * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2. + * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd with unlimited + * precision. If the 64-bit subtraction result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is + * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd. + * + * **Operations**:\n + * ~~~ + * * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * result = R[t_H].R[t_L] - (Rs1 u* Rs2); + * if (result < 0) { + * result = 0; OV = 1; + * } + * R[t_H].R[t_L] = result; + * * RV64: + * // `result` has unlimited precision + * result = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]); + * if (result < 0) { + * result = 0; OV = 1; + * } + * Rd = result; + * ~~~ + * + * \param [in] t unsigned long long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_UKMSR64(unsigned long long t, unsigned long a, unsigned long b) +{ + __ASM volatile("ukmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.157. UKMSR64 ===== */ + +/* ===== Inline Function Start for 3.158. UKSTAS16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief UKSTAS16 (SIMD 16-bit Unsigned Saturating Straight Addition & Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UKSTAS16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned + * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from + * corresponding positions in 32-bit chunks. + * + * **Description**:\n + * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in + * Rs1 with the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it + * subtracts the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit + * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit + * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. + * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit + * chunks in Rd for subtraction. + * + * **Operations**:\n + * ~~~ + * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16]; + * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0]; + * if (res1 > (2^16)-1) { + * res1 = (2^16)-1; + * OV = 1; + * } + * if (res2 < 0) { + * res2 = 0; + * OV = 1; + * } + * Rd.W[x][31:16] = res1; + * Rd.W[x][15:0] = res2; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKSTAS16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ukstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.158. UKSTAS16 ===== */ + +/* ===== Inline Function Start for 3.159. UKSTSA16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief UKSTSA16 (SIMD 16-bit Unsigned Saturating Straight Subtraction & Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UKSTSA16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned + * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from + * corresponding positions in 32-bit chunks. + * + * **Description**:\n + * This instruction subtracts the 16-bit unsigned integer element in [31:16] of 32-bit + * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the + * same time, it adds the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 with the 16- + * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the + * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set + * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of + * 32-bit chunks in Rd for addition. + * + * **Operations**:\n + * ~~~ + * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16]; + * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0]; + * if (res1 < 0) { + * res1 = 0; + * OV = 1; + * } else if (res2 > (2^16)-1) { + * res2 = (2^16)-1; + * OV = 1; + * } + * Rd.W[x][31:16] = res1; + * Rd.W[x][15:0] = res2; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKSTSA16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ukstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.159. UKSTSA16 ===== */ + +/* ===== Inline Function Start for 3.160. UKSUB8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB + * \brief UKSUB8 (SIMD 8-bit Unsigned Saturating Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UKSUB8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit unsigned integer elements saturating subtractions simultaneously. + * + * **Description**:\n + * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit + * unsigned integer elements in Rs1. If any of the results are beyond the 8-bit unsigned number range + * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are + * written to Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.B[x] - Rs2.B[x]; + * if (res[x] < 0) { + * res[x] = 0; + * OV = 1; + * } + * Rd.B[x] = res[x]; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKSUB8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("uksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.160. UKSUB8 ===== */ + +/* ===== Inline Function Start for 3.161. UKSUB16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief UKSUB16 (SIMD 16-bit Unsigned Saturating Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UKSUB16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit unsigned integer elements saturating subtractions simultaneously. + * + * **Description**:\n + * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit + * unsigned integer elements in Rs1. If any of the results are beyond the 16-bit unsigned number + * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated + * results are written to Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.H[x] - Rs2.H[x]; + * if (res[x] < 0) { + * res[x] = 0; + * OV = 1; + * } + * Rd.H[x] = res[x]; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKSUB16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("uksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.161. UKSUB16 ===== */ + +/* ===== Inline Function Start for 3.162. UKSUB64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB + * \brief UKSUB64 (64-bit Unsigned Saturating Subtraction) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * UKSUB64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Perform a 64-bit signed integer subtraction. The result is saturated to the U64 range. + * + * **RV32 Description**:\n + * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of + * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers + * specified by Rs1(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is + * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd + * pair of registers specified by Rd(4,1). + * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d` + * register of the pair contains the low 32-bit of the operand. + * + * **RV64 Description**:\n + * This instruction subtracts the 64-bit unsigned integer of Rs2 from the 64-bit + * unsigned integer of an even/odd pair of Rs1. If the 64-bit result is beyond the U64 number range (0 <= + * U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is then written + * to Rd. + * + * **Operations**:\n + * ~~~ + * * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1); + * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1); + * result = R[a_H].R[a_L] - R[b_H].R[b_L]; + * if (result < 0) { + * result = 0; OV = 1; + * } + * R[t_H].R[t_L] = result; + * * RV64 + * result = Rs1 - Rs2; + * if (result < 0) { + * result = 0; OV = 1; + * } + * Rd = result; + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b unsigned long long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_UKSUB64(unsigned long long a, unsigned long long b) +{ + register unsigned long long result; + __ASM volatile("uksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.162. UKSUB64 ===== */ + +/* ===== Inline Function Start for 3.163. UKSUBH ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU + * \brief UKSUBH (Unsigned Subtraction with U16 Saturation) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * UKSUBH Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Subtract the unsigned lower 32-bit content of two registers with U16 saturation. + * + * **Description**:\n + * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit + * content of Rs1. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then + * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag. + * + * **Operations**:\n + * ~~~ + * tmp = Rs1.W[0] - Rs2.W[0]; + * if (tmp > (2^16)-1) { + * tmp = (2^16)-1; + * OV = 1; + * } + * else if (tmp < 0) { + * tmp = 0; + * OV = 1; + * } + * Rd = SE(tmp[15:0]); + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKSUBH(unsigned int a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("uksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.163. UKSUBH ===== */ + +/* ===== Inline Function Start for 3.164. UKSUBW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU + * \brief UKSUBW (Unsigned Subtraction with U32 Saturation) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * UKSUBW Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Subtract the unsigned lower 32-bit content of two registers with unsigned 32-bit + * saturation. + * + * **Description**:\n + * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit + * content of Rs1. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then + * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag. + * + * **Operations**:\n + * ~~~ + * tmp = Rs1.W[0] - Rs2.W[0]; + * if (tmp < 0) { + * tmp[31:0] = 0; + * OV = 1; + * } + * Rd = tmp[31:0]; // RV32 + * Rd = SE(tmp[31:0]); // RV64 + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKSUBW(unsigned int a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("uksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.164. UKSUBW ===== */ + +/* ===== Inline Function Start for 3.165. UMAR64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB + * \brief UMAR64 (Unsigned Multiply and Add to 64-Bit Data) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * UMAR64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication + * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is + * written back to the pair of registers (RV32) or a register (RV64). + * + * **RV32 Description**:\n + * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It + * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers + * specified by Rd(4,1). The addition result is written back to the even/odd pair of registers specified by + * Rd(4,1). + * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2. + * It adds the 64-bit multiplication results to the 64-bit unsigned data of Rd. The addition result is + * written back to Rd. + * + * **Operations**:\n + * ~~~ + * * RV32: + * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1); + * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2); + * * RV64: + * Rd = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]); + * ~~~ + * + * \param [in] t unsigned long long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_UMAR64(unsigned long long t, unsigned long a, unsigned long b) +{ + __ASM volatile("umar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.165. UMAR64 ===== */ + +/* ===== Inline Function Start for 3.166. UMAQA ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD + * \brief UMAQA (Unsigned Multiply Four Bytes with 32- bit Adds) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * UMAQA Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do four unsigned 8-bit multiplications from 32-bit chunks of two registers; and then adds + * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together. + * + * **Description**:\n + * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four + * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the + * unsigned content of the corresponding 32-bit chunks of Rd. The final results are written back to the + * corresponding 32-bit chunks in Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rd.W[x] + (Rs1.W[x].B[3] u* Rs2.W[x].B[3]) + + * (Rs1.W[x].B[2] u* Rs2.W[x].B[2]) + (Rs1.W[x].B[1] u* Rs2.W[x].B[1]) + + * (Rs1.W[x].B[0] u* Rs2.W[x].B[0]); + * Rd.W[x] = res[x]; + * for RV32: x=0, + * for RV64: x=1...0 + * ~~~ + * + * \param [in] t unsigned long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UMAQA(unsigned long t, unsigned long a, unsigned long b) +{ + __ASM volatile("umaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 3.166. UMAQA ===== */ + +/* ===== Inline Function Start for 3.167. UMAX8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC + * \brief UMAX8 (SIMD 8-bit Unsigned Maximum) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UMAX8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit unsigned integer elements finding maximum operations simultaneously. + * + * **Description**:\n + * This instruction compares the 8-bit unsigned integer elements in Rs1 with the four 8- + * bit unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The + * two selected results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = (Rs1.B[x] >u Rs2.B[x])? Rs1.B[x] : Rs2.B[x]; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UMAX8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("umax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.167. UMAX8 ===== */ + +/* ===== Inline Function Start for 3.168. UMAX16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC + * \brief UMAX16 (SIMD 16-bit Unsigned Maximum) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UMAX16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit unsigned integer elements finding maximum operations simultaneously. + * + * **Description**:\n + * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit + * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The + * selected results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = (Rs1.H[x] >u Rs2.H[x])? Rs1.H[x] : Rs2.H[x]; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UMAX16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("umax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.168. UMAX16 ===== */ + +/* ===== Inline Function Start for 3.169. UMIN8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC + * \brief UMIN8 (SIMD 8-bit Unsigned Minimum) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * UMIN8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit unsigned integer elements finding minimum operations simultaneously. + * + * **Description**:\n + * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit + * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The + * selected results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = (Rs1.B[x] > 1; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URADD8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("uradd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.174. URADD8 ===== */ + +/* ===== Inline Function Start for 3.175. URADD16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief URADD16 (SIMD 16-bit Unsigned Halving Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * URADD16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit unsigned integer element additions simultaneously. The results are halved to + * avoid overflow or saturation. + * + * **Description**:\n + * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit + * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then + * written to Rd. + * + * **Examples**:\n + * ~~~ + * * Ra = 0x7FFF, Rb = 0x7FFF Rt = 0x7FFF + * * Ra = 0x8000, Rb = 0x8000 Rt = 0x8000 + * * Ra = 0x4000, Rb = 0x8000 Rt = 0x6000 + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) u>> 1; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URADD16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("uradd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.175. URADD16 ===== */ + +/* ===== Inline Function Start for 3.176. URADD64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB + * \brief URADD64 (64-bit Unsigned Halving Addition) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * URADD64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Add two 64-bit unsigned integers. The result is halved to avoid overflow or saturation. + * + * **RV32 Description**:\n + * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers + * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by + * Rs2(4,1). The 64-bit addition result is first logically right-shifted by 1 bit and then written to an + * even/odd pair of registers specified by Rd(4,1). + * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned + * integer Rs2. The 64-bit addition result is first logically right-shifted by 1 bit and then written to Rd. + * + * **Operations**:\n + * ~~~ + * * RV32: + * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1); + * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1); + * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1); + * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) u>> 1; + * * RV64: + * Rd = (Rs1 + Rs2) u>> 1; + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b unsigned long long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_URADD64(unsigned long long a, unsigned long long b) +{ + register unsigned long long result; + __ASM volatile("uradd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.176. URADD64 ===== */ + +/* ===== Inline Function Start for 3.177. URADDW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION + * \brief URADDW (32-bit Unsigned Halving Addition) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * URADDW Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Add 32-bit unsigned integers and the results are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction adds the first 32-bit unsigned integer in Rs1 with the first 32-bit + * unsigned integer in Rs2. The result is first logically right-shifted by 1 bit and then sign-extended and + * written to Rd. + * + * **Examples**:\n + * ~~~ + * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF + * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000 + * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000 + * ~~~ + * + * **Operations**:\n + * ~~~ + * * RV32: + * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1; + * * RV64: + * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1; + * Rd[63:0] = SE(resw[31:0]); + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URADDW(unsigned int a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("uraddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.177. URADDW ===== */ + +/* ===== Inline Function Start for 3.178. URCRAS16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief URCRAS16 (SIMD 16-bit Unsigned Halving Cross Addition & Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * URCRAS16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element + * subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. + * The results are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1 + * with the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned + * integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks + * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32- + * bit chunks in Rd and [15:0] of 32-bit chunks in Rd. + * + * **Examples**:\n + * ~~~ + * Please see `URADD16` and `URSUB16` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) u>> 1; + * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) u>> 1; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URCRAS16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("urcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.178. URCRAS16 ===== */ + +/* ===== Inline Function Start for 3.179. URCRSA16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief URCRSA16 (SIMD 16-bit Unsigned Halving Cross Subtraction & Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * URCRSA16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element + * addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. + * The results are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2 + * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned + * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [31:16] of 32-bit chunks + * in Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit + * chunks in Rd and [15:0] of 32-bit chunks in Rd. + * + * **Examples**:\n + * ~~~ + * Please see `URADD16` and `URSUB16` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) u>> 1; + * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) u>> 1; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URCRSA16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("urcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.179. URCRSA16 ===== */ + +/* ===== Inline Function Start for 3.180. URSTAS16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief URSTAS16 (SIMD 16-bit Unsigned Halving Straight Addition & Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * URSTAS16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element + * subtraction in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit + * chunks. The results are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1 + * with the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned + * integer in [15:0] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks + * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32- + * bit chunks in Rd and [15:0] of 32-bit chunks in Rd. + * + * **Examples**:\n + * ~~~ + * Please see `URADD16` and `URSUB16` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) u>> 1; + * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) u>> 1; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URSTAS16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("urstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.180. URSTAS16 ===== */ + +/* ===== Inline Function Start for 3.181. URSTSA16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief URSTSA16 (SIMD 16-bit Unsigned Halving Straight Subtraction & Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * URCRSA16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element + * addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit + * chunks. The results are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2 + * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned + * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [15:0] of 32-bit chunks in + * Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit + * chunks in Rd and [15:0] of 32-bit chunks in Rd. + * + * **Examples**:\n + * ~~~ + * Please see `URADD16` and `URSUB16` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) u>> 1; + * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) u>> 1; + * for RV32, x=0 + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URSTSA16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("urstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.181. URSTSA16 ===== */ + +/* ===== Inline Function Start for 3.182. URSUB8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB + * \brief URSUB8 (SIMD 8-bit Unsigned Halving Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * URSUB8 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 8-bit unsigned integer element subtractions simultaneously. The results are halved to + * avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit + * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then + * written to Rd. + * + * **Examples**:\n + * ~~~ + * * Ra = 0x7F, Rb = 0x80 Rt = 0xFF + * * Ra = 0x80, Rb = 0x7F Rt = 0x00 + * * Ra = 0x80, Rb = 0x40 Rt = 0x20 + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) u>> 1; + * for RV32: x=3...0, + * for RV64: x=7...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URSUB8(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ursub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.182. URSUB8 ===== */ + +/* ===== Inline Function Start for 3.183. URSUB16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB + * \brief URSUB16 (SIMD 16-bit Unsigned Halving Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * URSUB16 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 16-bit unsigned integer element subtractions simultaneously. The results are halved to + * avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit + * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then + * written to Rd. + * + * **Examples**:\n + * ~~~ + * * Ra = 0x7FFF, Rb = 0x8000 Rt = 0xFFFF + * * Ra = 0x8000, Rb = 0x7FFF Rt = 0x0000 + * * Ra = 0x8000, Rb = 0x4000 Rt = 0x2000 + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) u>> 1; + * for RV32: x=1...0, + * for RV64: x=3...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URSUB16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ursub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.183. URSUB16 ===== */ + +/* ===== Inline Function Start for 3.184. URSUB64 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB + * \brief URSUB64 (64-bit Unsigned Halving Subtraction) + * \details + * **Type**: DSP (64-bit Profile) + * + * **Syntax**:\n + * ~~~ + * URSUB64 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Perform a 64-bit unsigned integer subtraction. The result is halved to avoid overflow or + * saturation. + * + * **RV32 Description**:\n + * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of + * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers + * specified by Rs1(4,1). The subtraction result is first logically right-shifted by 1 bit and then written + * to an even/odd pair of registers specified by Rd(4,1). + * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair + * includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register + * of the pair contains the low 32-bit of the result. + * + * **RV64 Description**:\n + * This instruction subtracts the 64-bit unsigned integer in Rs2 from the 64-bit + * unsigned integer in Rs1. The subtraction result is first logically right-shifted by 1 bit and then + * written to Rd. + * + * **Operations**:\n + * ~~~ + * * RV32: + * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1); + * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1); + * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1); + * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) u>> 1; + * * RV64: + * Rd = (Rs1 - Rs2) u>> 1; + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b unsigned long long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_URSUB64(unsigned long long a, unsigned long long b) +{ + register unsigned long long result; + __ASM volatile("ursub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.184. URSUB64 ===== */ + +/* ===== Inline Function Start for 3.185. URSUBW ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION + * \brief URSUBW (32-bit Unsigned Halving Subtraction) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * URSUBW Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Subtract 32-bit unsigned integers and the result is halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit + * signed integer in Rs1. The result is first logically right-shifted by 1 bit and then sign-extended and + * written to Rd. + * + * **Examples**:\n + * ~~~ + * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0xFFFFFFFF + * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x00000000 + * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0x20000000 + * ~~~ + * + * **Operations**:\n + * ~~~ + * * RV32: + * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1; + * * RV64: + * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1; + * Rd[63:0] = SE(resw[31:0]); + * ~~~ + * + * \param [in] a unsigned int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URSUBW(unsigned int a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("ursubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.185. URSUBW ===== */ + +/* ===== Inline Function Start for 3.186. WEXTI ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC + * \brief WEXTI (Extract Word from 64-bit Immediate) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * WEXTI Rd, Rs1, #LSBloc + * ~~~ + * + * **Purpose**:\n + * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or + * a register (RV64) starting from a specified immediate LSB bit position. + * + * **RV32 Description**:\n + * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified + * by Rs1(4,1) starting from a specified immediate LSB bit position, #LSBloc. The extracted word is + * written to Rd. + * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register + * pair includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d` + * register of the pair contains the low 32-bit of the 64-bit value. + * + * **RV64 Description**:\n + * This instruction extracts a 32-bit word from a 64-bit value in Rs1 starting from a specified + * immediate LSB bit position, #LSBloc. The extracted word is sign-extended and written to lower 32- + * bit of Rd. + * + * **Operations**:\n + * ~~~ + * * RV32: + * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs2(4,1),1'b1); + * src[63:0] = Concat(R[Idx1], R[Idx0]); + * Rd = src[31+LSBloc:LSBloc]; + * * RV64: + * ExtractW = Rs1[31+LSBloc:LSBloc]; + * Rd = SE(ExtractW) + * ~~~ + * + * \param [in] a long long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +#define __RV_WEXTI(a, b) \ + ({ \ + register unsigned long result; \ + register long long __a = (long long)(a); \ + __ASM volatile("wexti %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \ + result; \ + }) +/* ===== Inline Function End for 3.186. WEXTI ===== */ + +/* ===== Inline Function Start for 3.187. WEXT ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC + * \brief WEXT (Extract Word from 64-bit) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * WEXT Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or + * a register (RV64) starting from a specified LSB bit position in a register. + * + * **RV32 Description**:\n + * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified + * by Rs1(4,1) starting from a specified LSB bit position, specified in Rs2[4:0]. The extracted word is + * written to Rd. + * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register + * pair includes register 2d and 2d+1. + * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d` + * register of the pair contains the low 32-bit of the 64-bit value. + * + * **Operations**:\n + * ~~~ + * * RV32: + * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1); + * src[63:0] = Concat(R[Idx1], R[Idx0]); + * LSBloc = Rs2[4:0]; + * Rd = src[31+LSBloc:LSBloc]; + * * RV64: + * LSBloc = Rs2[4:0]; + * ExtractW = Rs1[31+LSBloc:LSBloc]; + * Rd = SE(ExtractW) + * ~~~ + * + * \param [in] a long long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_WEXT(long long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("wext %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 3.187. WEXT ===== */ + +/* ===== Inline Function Start for 3.188.1. ZUNPKD810 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK + * \brief ZUNPKD810 (Unsigned Unpacking Bytes 1 & 0) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * ZUNPKD8xy Rd, Rs1 + * xy = {10, 20, 30, 31, 32} + * ~~~ + * + * **Purpose**:\n + * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned + * halfwords of 32-bit chunks in a register. + * + * **Description**:\n + * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into + * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit + * chunks in Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x]) + * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y]) + * // ZUNPKD810, x=1,y=0 + * // ZUNPKD820, x=2,y=0 + * // ZUNPKD830, x=3,y=0 + * // ZUNPKD831, x=3,y=1 + * // ZUNPKD832, x=3,y=2 + * for RV32: m=0, + * for RV64: m=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD810(unsigned long a) +{ + register unsigned long result; + __ASM volatile("zunpkd810 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.188.1. ZUNPKD810 ===== */ + +/* ===== Inline Function Start for 3.188.2. ZUNPKD820 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK + * \brief ZUNPKD820 (Unsigned Unpacking Bytes 2 & 0) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * ZUNPKD8xy Rd, Rs1 + * xy = {10, 20, 30, 31, 32} + * ~~~ + * + * **Purpose**:\n + * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned + * halfwords of 32-bit chunks in a register. + * + * **Description**:\n + * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into + * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit + * chunks in Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x]) + * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y]) + * // ZUNPKD810, x=1,y=0 + * // ZUNPKD820, x=2,y=0 + * // ZUNPKD830, x=3,y=0 + * // ZUNPKD831, x=3,y=1 + * // ZUNPKD832, x=3,y=2 + * for RV32: m=0, + * for RV64: m=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD820(unsigned long a) +{ + register unsigned long result; + __ASM volatile("zunpkd820 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.188.2. ZUNPKD820 ===== */ + +/* ===== Inline Function Start for 3.188.3. ZUNPKD830 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK + * \brief ZUNPKD830 (Unsigned Unpacking Bytes 3 & 0) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * ZUNPKD8xy Rd, Rs1 + * xy = {10, 20, 30, 31, 32} + * ~~~ + * + * **Purpose**:\n + * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned + * halfwords of 32-bit chunks in a register. + * + * **Description**:\n + * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into + * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit + * chunks in Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x]) + * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y]) + * // ZUNPKD810, x=1,y=0 + * // ZUNPKD820, x=2,y=0 + * // ZUNPKD830, x=3,y=0 + * // ZUNPKD831, x=3,y=1 + * // ZUNPKD832, x=3,y=2 + * for RV32: m=0, + * for RV64: m=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD830(unsigned long a) +{ + register unsigned long result; + __ASM volatile("zunpkd830 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.188.3. ZUNPKD830 ===== */ + +/* ===== Inline Function Start for 3.188.4. ZUNPKD831 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK + * \brief ZUNPKD831 (Unsigned Unpacking Bytes 3 & 1) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * ZUNPKD8xy Rd, Rs1 + * xy = {10, 20, 30, 31, 32} + * ~~~ + * + * **Purpose**:\n + * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned + * halfwords of 32-bit chunks in a register. + * + * **Description**:\n + * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into + * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit + * chunks in Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x]) + * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y]) + * // ZUNPKD810, x=1,y=0 + * // ZUNPKD820, x=2,y=0 + * // ZUNPKD830, x=3,y=0 + * // ZUNPKD831, x=3,y=1 + * // ZUNPKD832, x=3,y=2 + * for RV32: m=0, + * for RV64: m=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD831(unsigned long a) +{ + register unsigned long result; + __ASM volatile("zunpkd831 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.188.4. ZUNPKD831 ===== */ + +/* ===== Inline Function Start for 3.188.5. ZUNPKD832 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK + * \brief ZUNPKD832 (Unsigned Unpacking Bytes 3 & 2) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * ZUNPKD8xy Rd, Rs1 + * xy = {10, 20, 30, 31, 32} + * ~~~ + * + * **Purpose**:\n + * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned + * halfwords of 32-bit chunks in a register. + * + * **Description**:\n + * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into + * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit + * chunks in Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x]) + * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y]) + * // ZUNPKD810, x=1,y=0 + * // ZUNPKD820, x=2,y=0 + * // ZUNPKD830, x=3,y=0 + * // ZUNPKD831, x=3,y=1 + * // ZUNPKD832, x=3,y=2 + * for RV32: m=0, + * for RV64: m=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_ZUNPKD832(unsigned long a) +{ + register unsigned long result; + __ASM volatile("zunpkd832 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 3.188.5. ZUNPKD832 ===== */ + +#if (__RISCV_XLEN == 64) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__) + +/* ===== Inline Function Start for 4.1. ADD32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief ADD32 (SIMD 32-bit Addition) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * ADD32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit integer element additions simultaneously. + * + * **Description**:\n + * This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer + * elements in Rs2, and then writes the 32-bit element results to Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned addition. + * + * **Operations**:\n + * ~~~ + * Rd.W[x] = Rs1.W[x] + Rs2.W[x]; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_ADD32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("add32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.1. ADD32 ===== */ + +/* ===== Inline Function Start for 4.2. CRAS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief CRAS32 (SIMD 32-bit Cross Addition & Subtraction) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * CRAS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit + * chunk simultaneously. Operands are from crossed 32-bit elements. + * + * **Description**:\n + * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit + * integer element in [31:0] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts + * the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and + * writes the result to [31:0] of Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned operations. + * + * **Operations**:\n + * ~~~ + * Rd.W[1] = Rs1.W[1] + Rs2.W[0]; + * Rd.W[0] = Rs1.W[0] - Rs2.W[1]; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CRAS32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("cras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.2. CRAS32 ===== */ + +/* ===== Inline Function Start for 4.3. CRSA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief CRSA32 (SIMD 32-bit Cross Subtraction & Addition) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * CRSA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit + * chunk simultaneously. Operands are from crossed 32-bit elements. + * *Description: * + * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element + * in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer + * element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2, and writes the result to + * [31:0] of Rd + * + * **Note**:\n + * This instruction can be used for either signed or unsigned operations. + * + * **Operations**:\n + * ~~~ + * Rd.W[1] = Rs1.W[1] - Rs2.W[0]; + * Rd.W[0] = Rs1.W[0] + Rs2.W[1]; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_CRSA32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("crsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.3. CRSA32 ===== */ + +/* ===== Inline Function Start for 4.4. KABS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC + * \brief KABS32 (Scalar 32-bit Absolute Value with Saturation) + * \details + * **Type**: DSP (RV64 Only) +24 20 +19 15 +14 12 +11 7 +KABS32 +10010 +Rs1 +000 +Rd +6 0 +GE80B +1111111 + * + * **Syntax**:\n + * ~~~ + * KABS32 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Get the absolute value of signed 32-bit integer elements in a general register. + * + * **Description**:\n + * This instruction calculates the absolute value of signed 32-bit integer elements stored + * in Rs1. The results are written to Rd. This instruction with the minimum negative integer input of + * 0x80000000 will produce a saturated output of maximum positive integer of 0x7fffffff and the OV + * flag will be set to 1. + * + * **Operations**:\n + * ~~~ + * if (Rs1.W[x] >= 0) { + * res[x] = Rs1.W[x]; + * } else { + * If (Rs1.W[x] == 0x80000000) { + * res[x] = 0x7fffffff; + * OV = 1; + * } else { + * res[x] = -Rs1.W[x]; + * } + * } + * Rd.W[x] = res[x]; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KABS32(unsigned long a) +{ + register unsigned long result; + __ASM volatile("kabs32 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for 4.4. KABS32 ===== */ + +/* ===== Inline Function Start for 4.5. KADD32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief KADD32 (SIMD 32-bit Signed Saturating Addition) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KADD32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer element saturating additions simultaneously. + * + * **Description**:\n + * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed + * integer elements in Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), + * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.W[x] + Rs2.W[x]; + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KADD32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.5. KADD32 ===== */ + +/* ===== Inline Function Start for 4.6. KCRAS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief KCRAS32 (SIMD 32-bit Signed Saturating Cross Addition & Subtraction) + * \details + * **Type**: SIM (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KCRAS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer element saturating addition and 32-bit signed integer element + * saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. + * + * **Description**:\n + * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit + * integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit integer element in [63:32] of + * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number + * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated + * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction. + * + * **Operations**:\n + * ~~~ + * res[1] = Rs1.W[1] + Rs2.W[0]; + * res[0] = Rs1.W[0] - Rs2.W[1]; + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[1] = res[1]; + * Rd.W[0] = res[0]; + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KCRAS32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.6. KCRAS32 ===== */ + +/* ===== Inline Function Start for 4.7. KCRSA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief KCRSA32 (SIMD 32-bit Signed Saturating Cross Subtraction & Addition) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KCRSA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element + * saturating addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. + * *Description: * + * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element + * in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit + * integer element in [63:32] of Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31 + * <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to + * [63:32] of Rd for subtraction and [31:0] of Rd for addition. + * + * **Operations**:\n + * ~~~ + * res[1] = Rs1.W[1] - Rs2.W[0]; + * res[0] = Rs1.W[0] + Rs2.W[1]; + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[1] = res[1]; + * Rd.W[0] = res[0]; + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KCRSA32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.7. KCRSA32 ===== */ + +/* ===== Inline Function Start for 4.8.1. KDMBB16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT + * \brief KDMBB16 (SIMD Signed Saturating Double Multiply B16 x B16) + * \details + * **Type**: SIMD (RV64 only) + * + * **Syntax**:\n + * ~~~ + * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks + * in the destination register. If saturation happens, an overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top + * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and + * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both + * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF + * and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1) + * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1) + * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1) + * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y]; + * If (0x8000 != aop[z] | 0x8000 != bop[z]) { + * Mresult[z] = aop[z] * bop[z]; + * resQ31[z] = Mresult[z] << 1; + * } else { + * resQ31[z] = 0x7FFFFFFF; + * OV = 1; + * } + * Rd.W[z] = resQ31[z]; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KDMBB16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kdmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.8.1. KDMBB16 ===== */ + +/* ===== Inline Function Start for 4.8.2. KDMBT16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT + * \brief KDMBT16 (SIMD Signed Saturating Double Multiply B16 x T16) + * \details + * **Type**: SIMD (RV64 only) + * + * **Syntax**:\n + * ~~~ + * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks + * in the destination register. If saturation happens, an overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top + * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and + * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both + * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF + * and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1) + * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1) + * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1) + * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y]; + * If (0x8000 != aop[z] | 0x8000 != bop[z]) { + * Mresult[z] = aop[z] * bop[z]; + * resQ31[z] = Mresult[z] << 1; + * } else { + * resQ31[z] = 0x7FFFFFFF; + * OV = 1; + * } + * Rd.W[z] = resQ31[z]; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KDMBT16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kdmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.8.2. KDMBT16 ===== */ + +/* ===== Inline Function Start for 4.8.3. KDMTT16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT + * \brief KDMTT16 (SIMD Signed Saturating Double Multiply T16 x T16) + * \details + * **Type**: SIMD (RV64 only) + * + * **Syntax**:\n + * ~~~ + * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks + * in the destination register. If saturation happens, an overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top + * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and + * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both + * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF + * and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1) + * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1) + * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1) + * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y]; + * If (0x8000 != aop[z] | 0x8000 != bop[z]) { + * Mresult[z] = aop[z] * bop[z]; + * resQ31[z] = Mresult[z] << 1; + * } else { + * resQ31[z] = 0x7FFFFFFF; + * OV = 1; + * } + * Rd.W[z] = resQ31[z]; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KDMTT16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kdmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.8.3. KDMTT16 ===== */ + +/* ===== Inline Function Start for 4.9.1. KDMABB16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT + * \brief KDMABB16 (SIMD Signed Saturating Double Multiply Addition B16 x B16) + * \details + * **Type**: SIMD (RV64 only) + * + * **Syntax**:\n + * ~~~ + * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with + * the values of the corresponding 32-bit chunks from the destination register and write the saturated + * addition results back into the corresponding 32-bit chunks of the destination register. If saturation + * happens, an overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top + * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then + * doubled and saturated into Q31 values. The Q31 values are then added with the content of the + * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <= + * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation + * are written back to Rd. + * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be + * set. + * + * **Operations**:\n + * ~~~ + * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1) + * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1) + * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1) + * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y]; + * If (0x8000 != aop[z] | 0x8000 != bop[z]) { + * Mresult[z] = aop[z] * bop[z]; + * resQ31[z] = Mresult[z] << 1; + * } else { + * resQ31[z] = 0x7FFFFFFF; + * OV = 1; + * } + * resadd[z] = Rd.W[z] + resQ31[z]; + * if (resadd[z] > (2^31)-1) { + * resadd[z] = (2^31)-1; + * OV = 1; + * } else if (resadd[z] < -2^31) { + * resadd[z] = -2^31; + * OV = 1; + * } + * Rd.W[z] = resadd[z]; + * ~~~ + * + * \param [in] t unsigned long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KDMABB16(unsigned long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kdmabb16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 4.9.1. KDMABB16 ===== */ + +/* ===== Inline Function Start for 4.9.2. KDMABT16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT + * \brief KDMABT16 (SIMD Signed Saturating Double Multiply Addition B16 x T16) + * \details + * **Type**: SIMD (RV64 only) + * + * **Syntax**:\n + * ~~~ + * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with + * the values of the corresponding 32-bit chunks from the destination register and write the saturated + * addition results back into the corresponding 32-bit chunks of the destination register. If saturation + * happens, an overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top + * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then + * doubled and saturated into Q31 values. The Q31 values are then added with the content of the + * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <= + * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation + * are written back to Rd. + * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be + * set. + * + * **Operations**:\n + * ~~~ + * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1) + * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1) + * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1) + * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y]; + * If (0x8000 != aop[z] | 0x8000 != bop[z]) { + * Mresult[z] = aop[z] * bop[z]; + * resQ31[z] = Mresult[z] << 1; + * } else { + * resQ31[z] = 0x7FFFFFFF; + * OV = 1; + * } + * resadd[z] = Rd.W[z] + resQ31[z]; + * if (resadd[z] > (2^31)-1) { + * resadd[z] = (2^31)-1; + * OV = 1; + * } else if (resadd[z] < -2^31) { + * resadd[z] = -2^31; + * OV = 1; + * } + * Rd.W[z] = resadd[z]; + * ~~~ + * + * \param [in] t unsigned long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KDMABT16(unsigned long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kdmabt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 4.9.2. KDMABT16 ===== */ + +/* ===== Inline Function Start for 4.9.3. KDMATT16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT + * \brief KDMATT16 (SIMD Signed Saturating Double Multiply Addition T16 x T16) + * \details + * **Type**: SIMD (RV64 only) + * + * **Syntax**:\n + * ~~~ + * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with + * the values of the corresponding 32-bit chunks from the destination register and write the saturated + * addition results back into the corresponding 32-bit chunks of the destination register. If saturation + * happens, an overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top + * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then + * doubled and saturated into Q31 values. The Q31 values are then added with the content of the + * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <= + * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation + * are written back to Rd. + * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be + * set. + * + * **Operations**:\n + * ~~~ + * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1) + * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1) + * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1) + * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y]; + * If (0x8000 != aop[z] | 0x8000 != bop[z]) { + * Mresult[z] = aop[z] * bop[z]; + * resQ31[z] = Mresult[z] << 1; + * } else { + * resQ31[z] = 0x7FFFFFFF; + * OV = 1; + * } + * resadd[z] = Rd.W[z] + resQ31[z]; + * if (resadd[z] > (2^31)-1) { + * resadd[z] = (2^31)-1; + * OV = 1; + * } else if (resadd[z] < -2^31) { + * resadd[z] = -2^31; + * OV = 1; + * } + * Rd.W[z] = resadd[z]; + * ~~~ + * + * \param [in] t unsigned long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KDMATT16(unsigned long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kdmatt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 4.9.3. KDMATT16 ===== */ + +/* ===== Inline Function Start for 4.10.1. KHMBB16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT + * \brief KHMBB16 (SIMD Signed Saturating Half Multiply B16 x B16) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15 + * numbers again and saturate the Q15 results into the destination register. If saturation happens, an + * overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top + * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15- + * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in + * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated + * to 0x7FFF and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1) + * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1) + * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1) + * aop = Rs1.H[x]; bop = Rs2.H[y]; + * If (0x8000 != aop | 0x8000 != bop) { + * Mresult[31:0] = aop * bop; + * res[15:0] = Mresult[30:15]; + * } else { + * res[15:0] = 0x7FFF; + * OV = 1; + * } + * Rd.W[z] = SE32(res[15:0]); + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KHMBB16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("khmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.10.1. KHMBB16 ===== */ + +/* ===== Inline Function Start for 4.10.2. KHMBT16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT + * \brief KHMBT16 (SIMD Signed Saturating Half Multiply B16 x T16) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15 + * numbers again and saturate the Q15 results into the destination register. If saturation happens, an + * overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top + * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15- + * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in + * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated + * to 0x7FFF and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1) + * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1) + * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1) + * aop = Rs1.H[x]; bop = Rs2.H[y]; + * If (0x8000 != aop | 0x8000 != bop) { + * Mresult[31:0] = aop * bop; + * res[15:0] = Mresult[30:15]; + * } else { + * res[15:0] = 0x7FFF; + * OV = 1; + * } + * Rd.W[z] = SE32(res[15:0]); + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KHMBT16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("khmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.10.2. KHMBT16 ===== */ + +/* ===== Inline Function Start for 4.10.3. KHMTT16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT + * \brief KHMTT16 (SIMD Signed Saturating Half Multiply T16 x T16) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT) + * ~~~ + * + * **Purpose**:\n + * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion + * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15 + * numbers again and saturate the Q15 results into the destination register. If saturation happens, an + * overflow flag OV will be set. + * + * **Description**:\n + * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top + * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15- + * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in + * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated + * to 0x7FFF and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1) + * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1) + * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1) + * aop = Rs1.H[x]; bop = Rs2.H[y]; + * If (0x8000 != aop | 0x8000 != bop) { + * Mresult[31:0] = aop * bop; + * res[15:0] = Mresult[30:15]; + * } else { + * res[15:0] = 0x7FFF; + * OV = 1; + * } + * Rd.W[z] = SE32(res[15:0]); + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KHMTT16(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("khmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.10.3. KHMTT16 ===== */ + +/* ===== Inline Function Start for 4.11.1. KMABB32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD + * \brief KMABB32 (Saturating Signed Multiply Bottom Words & Add) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KMABB32 Rd, Rs1, Rs2 + * KMABT32 Rd, Rs1, Rs2 + * KMATT32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit element in a register with the 32-bit element in another register + * and add the result to the content of 64-bit data in the third register. The addition result may be + * saturated and is written to the third register. + * * KMABB32: rd + bottom*bottom + * * KMABT32: rd + bottom*top + * * KMATT32: rd + top*top + * + * **Description**:\n + * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit + * element in Rs2. + * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit + * element in Rs2. + * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit + * element in Rs2. + * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond + * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The + * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32 + * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32 + * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32 + * if (res > (2^63)-1) { + * res = (2^63)-1; + * OV = 1; + * } else if (res < -2^63) { + * res = -2^63; + * OV = 1; + * } + * Rd = res; + * *Exceptions:* None + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMABB32(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmabb32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 4.11.1. KMABB32 ===== */ + +/* ===== Inline Function Start for 4.11.2. KMABT32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD + * \brief KMABT32 (Saturating Signed Multiply Bottom & Top Words & Add) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KMABB32 Rd, Rs1, Rs2 + * KMABT32 Rd, Rs1, Rs2 + * KMATT32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit element in a register with the 32-bit element in another register + * and add the result to the content of 64-bit data in the third register. The addition result may be + * saturated and is written to the third register. + * * KMABB32: rd + bottom*bottom + * * KMABT32: rd + bottom*top + * * KMATT32: rd + top*top + * + * **Description**:\n + * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit + * element in Rs2. + * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit + * element in Rs2. + * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit + * element in Rs2. + * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond + * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The + * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32 + * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32 + * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32 + * if (res > (2^63)-1) { + * res = (2^63)-1; + * OV = 1; + * } else if (res < -2^63) { + * res = -2^63; + * OV = 1; + * } + * Rd = res; + * *Exceptions:* None + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMABT32(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmabt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 4.11.2. KMABT32 ===== */ + +/* ===== Inline Function Start for 4.11.3. KMATT32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD + * \brief KMATT32 (Saturating Signed Multiply Top Words & Add) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KMABB32 Rd, Rs1, Rs2 + * KMABT32 Rd, Rs1, Rs2 + * KMATT32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit element in a register with the 32-bit element in another register + * and add the result to the content of 64-bit data in the third register. The addition result may be + * saturated and is written to the third register. + * * KMABB32: rd + bottom*bottom + * * KMABT32: rd + bottom*top + * * KMATT32: rd + top*top + * + * **Description**:\n + * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit + * element in Rs2. + * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit + * element in Rs2. + * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit + * element in Rs2. + * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond + * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The + * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32 + * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32 + * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32 + * if (res > (2^63)-1) { + * res = (2^63)-1; + * OV = 1; + * } else if (res < -2^63) { + * res = -2^63; + * OV = 1; + * } + * Rd = res; + * *Exceptions:* None + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMATT32(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmatt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 4.11.3. KMATT32 ===== */ + +/* ===== Inline Function Start for 4.12.1. KMADA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC + * \brief KMADA32 (Saturating Signed Multiply Two Words and Two Adds) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KMADA32 Rd, Rs1, Rs2 + * KMAXDA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the + * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated. + * * KMADA32: rd + top*top + bottom*bottom + * * KMAXDA32: rd + top*bottom + bottom*top + * + * **Description**:\n + * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32- + * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1 + * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction. + * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit + * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1 + * with the top 32-bit element in Rs2. + * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 + * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit + * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32 + * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32 + * if (res > (2^63)-1) { + * res = (2^63)-1; + * OV = 1; + * } else if (res < -2^63) { + * res = -2^63; + * OV = 1; + * } + * Rd = res; + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMADA32(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 4.12.1. KMADA32 ===== */ + +/* ===== Inline Function Start for 4.12.2. KMAXDA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC + * \brief KMAXDA32 (Saturating Signed Crossed Multiply Two Words and Two Adds) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KMADA32 Rd, Rs1, Rs2 + * KMAXDA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the + * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated. + * * KMADA32: rd + top*top + bottom*bottom + * * KMAXDA32: rd + top*bottom + bottom*top + * + * **Description**:\n + * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32- + * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1 + * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction. + * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit + * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1 + * with the top 32-bit element in Rs2. + * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 + * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit + * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32 + * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32 + * if (res > (2^63)-1) { + * res = (2^63)-1; + * OV = 1; + * } else if (res < -2^63) { + * res = -2^63; + * OV = 1; + * } + * Rd = res; + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMAXDA32(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmaxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 4.12.2. KMAXDA32 ===== */ + +/* ===== Inline Function Start for 4.13.1. KMDA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC + * \brief KMDA32 (Signed Multiply Two Words and Add) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KMDA32 Rd, Rs1, Rs2 + * KMXDA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then + * adds the two 64-bit results together. The addition result may be saturated. + * * KMDA32: top*top + bottom*bottom + * * KMXDA32: top*bottom + bottom*top + * + * **Description**:\n + * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit + * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1 + * with the top 32-bit element of Rs2. + * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit + * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1 + * with the bottom 32-bit element of Rs2. + * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1. + * The final result is written to Rd. The 32-bit contents are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) { + * Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32 + * Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32 + * } else { + * Rd = 0x7fffffffffffffff; + * OV = 1; + * } + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMDA32(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("kmda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.13.1. KMDA32 ===== */ + +/* ===== Inline Function Start for 4.13.2. KMXDA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC + * \brief KMXDA32 (Signed Crossed Multiply Two Words and Add) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KMDA32 Rd, Rs1, Rs2 + * KMXDA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then + * adds the two 64-bit results together. The addition result may be saturated. + * * KMDA32: top*top + bottom*bottom + * * KMXDA32: top*bottom + bottom*top + * + * **Description**:\n + * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit + * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1 + * with the top 32-bit element of Rs2. + * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit + * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1 + * with the bottom 32-bit element of Rs2. + * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1. + * The final result is written to Rd. The 32-bit contents are treated as signed integers. + * + * **Operations**:\n + * ~~~ + * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) { + * Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32 + * Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32 + * } else { + * Rd = 0x7fffffffffffffff; + * OV = 1; + * } + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMXDA32(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("kmxda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.13.2. KMXDA32 ===== */ + +/* ===== Inline Function Start for 4.14.1. KMADS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC + * \brief KMADS32 (Saturating Signed Multiply Two Words & Subtract & Add) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KMADS32 Rd, Rs1, Rs2 + * KMADRS32 Rd, Rs1, Rs2 + * KMAXDS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then + * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to + * 64-bit data in a third register. The addition result may be saturated. + * * KMADS32: rd + (top*top - bottom*bottom) + * * KMADRS32: rd + (bottom*bottom - top*top) + * * KMAXDS32: rd + (top*bottom - bottom*top) + * + * **Description**:\n + * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit + * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in + * Rs1 with the top 32-bit element in Rs2. + * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit + * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit + * element in Rs1 with the bottom 32-bit element in Rs2. + * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit + * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in + * Rs1 with the bottom 32-bit element in Rs2. + * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is + * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to + * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated + * as signed integers. + * + * **Operations**:\n + * ~~~ + * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32 + * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32 + * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32 + * if (res > (2^63)-1) { + * res = (2^63)-1; + * OV = 1; + * } else if (res < -2^63) { + * res = -2^63; + * OV = 1; + * } + * Rd = res; + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMADS32(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmads32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 4.14.1. KMADS32 ===== */ + +/* ===== Inline Function Start for 4.14.2. KMADRS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC + * \brief KMADRS32 (Saturating Signed Multiply Two Words & Reverse Subtract & Add) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KMADS32 Rd, Rs1, Rs2 + * KMADRS32 Rd, Rs1, Rs2 + * KMAXDS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then + * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to + * 64-bit data in a third register. The addition result may be saturated. + * * KMADS32: rd + (top*top - bottom*bottom) + * * KMADRS32: rd + (bottom*bottom - top*top) + * * KMAXDS32: rd + (top*bottom - bottom*top) + * + * **Description**:\n + * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit + * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in + * Rs1 with the top 32-bit element in Rs2. + * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit + * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit + * element in Rs1 with the bottom 32-bit element in Rs2. + * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit + * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in + * Rs1 with the bottom 32-bit element in Rs2. + * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is + * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to + * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated + * as signed integers. + * + * **Operations**:\n + * ~~~ + * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32 + * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32 + * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32 + * if (res > (2^63)-1) { + * res = (2^63)-1; + * OV = 1; + * } else if (res < -2^63) { + * res = -2^63; + * OV = 1; + * } + * Rd = res; + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMADRS32(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmadrs32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 4.14.2. KMADRS32 ===== */ + +/* ===== Inline Function Start for 4.14.3. KMAXDS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC + * \brief KMAXDS32 (Saturating Signed Crossed Multiply Two Words & Subtract & Add) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KMADS32 Rd, Rs1, Rs2 + * KMADRS32 Rd, Rs1, Rs2 + * KMAXDS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then + * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to + * 64-bit data in a third register. The addition result may be saturated. + * * KMADS32: rd + (top*top - bottom*bottom) + * * KMADRS32: rd + (bottom*bottom - top*top) + * * KMAXDS32: rd + (top*bottom - bottom*top) + * + * **Description**:\n + * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit + * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in + * Rs1 with the top 32-bit element in Rs2. + * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit + * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit + * element in Rs1 with the bottom 32-bit element in Rs2. + * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit + * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in + * Rs1 with the bottom 32-bit element in Rs2. + * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is + * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to + * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated + * as signed integers. + * + * **Operations**:\n + * ~~~ + * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32 + * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32 + * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32 + * if (res > (2^63)-1) { + * res = (2^63)-1; + * OV = 1; + * } else if (res < -2^63) { + * res = -2^63; + * OV = 1; + * } + * Rd = res; + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMAXDS32(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmaxds32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 4.14.3. KMAXDS32 ===== */ + +/* ===== Inline Function Start for 4.15.1. KMSDA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC + * \brief KMSDA32 (Saturating Signed Multiply Two Words & Add & Subtract) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KMSDA32 Rd, Rs1, Rs2 + * KMSXDA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then + * subtracts the two 64-bit results from a third register. The subtraction result may be saturated. + * * KMSDA: rd - top*top - bottom*bottom + * * KMSXDA: rd - top*bottom - bottom*top + * + * **Description**:\n + * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit + * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. + * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit + * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2. + * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction + * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit + * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32 + * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32 + * if (res > (2^63)-1) { + * res = (2^63)-1; + * OV = 1; + * } else if (res < -2^63) { + * res = -2^63; + * OV = 1; + * } + * Rd = res; + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMSDA32(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmsda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 4.15.1. KMSDA32 ===== */ + +/* ===== Inline Function Start for 4.15.2. KMSXDA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC + * \brief KMSXDA32 (Saturating Signed Crossed Multiply Two Words & Add & Subtract) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KMSDA32 Rd, Rs1, Rs2 + * KMSXDA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then + * subtracts the two 64-bit results from a third register. The subtraction result may be saturated. + * * KMSDA: rd - top*top - bottom*bottom + * * KMSXDA: rd - top*bottom - bottom*top + * + * **Description**:\n + * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit + * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. + * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit + * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2. + * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction + * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit + * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32 + * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32 + * if (res > (2^63)-1) { + * res = (2^63)-1; + * OV = 1; + * } else if (res < -2^63) { + * res = -2^63; + * OV = 1; + * } + * Rd = res; + * ~~~ + * + * \param [in] t long type of value stored in t + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_KMSXDA32(long t, unsigned long a, unsigned long b) +{ + __ASM volatile("kmsxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b)); + return t; +} +/* ===== Inline Function End for 4.15.2. KMSXDA32 ===== */ + +/* ===== Inline Function Start for 4.16. KSLL32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief KSLL32 (SIMD 32-bit Saturating Shift Left Logical) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KSLL32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift + * amount is a variable from a GPR. + * + * **Description**:\n + * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled + * with zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register. + * Any shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is + * saturated to -2^31. And the saturated results are written to Rd. If any saturation is performed, set OV + * bit to 1. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[4:0]; + * if (sa != 0) { + * res[(31+sa):0] = Rs1.W[x] << sa; + * if (res > (2^31)-1) { + * res = 0x7fffffff; OV = 1; + * } else if (res < -2^31) { + * res = 0x80000000; OV = 1; + * } + * Rd.W[x] = res[31:0]; + * } else { + * Rd = Rs1; + * } + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSLL32(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("ksll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.16. KSLL32 ===== */ + +/* ===== Inline Function Start for 4.17. KSLLI32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief KSLLI32 (SIMD 32-bit Saturating Shift Left Logical Immediate) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KSLLI32 Rd, Rs1, imm5u + * ~~~ + * + * **Purpose**:\n + * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift + * amount is an immediate value. + * + * **Description**:\n + * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled + * with zero and the shift amount is specified by the imm5u constant. Any shifted value greater than + * 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated + * results are written to Rd. If any saturation is performed, set OV bit to 1. + * + * **Operations**:\n + * ~~~ + * sa = imm5u[4:0]; + * if (sa != 0) { + * res[(31+sa):0] = Rs1.W[x] << sa; + * if (res > (2^31)-1) { + * res = 0x7fffffff; OV = 1; + * } else if (res < -2^31) { + * res = 0x80000000; OV = 1; + * } + * Rd.W[x] = res[31:0]; + * } else { + * Rd = Rs1; + * } + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSLLI32(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("kslli32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.17. KSLLI32 ===== */ + +/* ===== Inline Function Start for 4.18.1. KSLRA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief KSLRA32 (SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KSLRA32 Rd, Rs1, Rs2 + * KSLRA32.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with + * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the + * right shift. + * + * **Description**:\n + * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically + * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means + * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the + * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be + * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`. + * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u` + * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit + * position for rounding effect. After the shift, saturation, or rounding, the final results are written to + * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect + * this instruction. + * + * **Operations**:\n + * ~~~ + * if (Rs2[5:0] < 0) { + * sa = -Rs2[5:0]; + * sa = (sa == 32)? 31 : sa; + * if (`.u` form) { + * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1; + * Rd.W[x] = res[31:0]; + * } else { + * Rd.W[x] = SE32(Rs1.W[x][31:sa]); + * } + * } else { + * sa = Rs2[4:0]; + * res[(31+sa):0] = Rs1.W[x] <<(logic) sa; + * if (res > (2^31)-1) { + * res[31:0] = 0x7fffffff; OV = 1; + * } else if (res < -2^31) { + * res[31:0] = 0x80000000; OV = 1; + * } + * Rd.W[x] = res[31:0]; + * } + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSLRA32(unsigned long a, int b) +{ + register unsigned long result; + __ASM volatile("kslra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.18.1. KSLRA32 ===== */ + +/* ===== Inline Function Start for 4.18.2. KSLRA32.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief KSLRA32.u (SIMD 32-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KSLRA32 Rd, Rs1, Rs2 + * KSLRA32.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with + * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the + * right shift. + * + * **Description**:\n + * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically + * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means + * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the + * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be + * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`. + * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u` + * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit + * position for rounding effect. After the shift, saturation, or rounding, the final results are written to + * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect + * this instruction. + * + * **Operations**:\n + * ~~~ + * if (Rs2[5:0] < 0) { + * sa = -Rs2[5:0]; + * sa = (sa == 32)? 31 : sa; + * if (`.u` form) { + * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1; + * Rd.W[x] = res[31:0]; + * } else { + * Rd.W[x] = SE32(Rs1.W[x][31:sa]); + * } + * } else { + * sa = Rs2[4:0]; + * res[(31+sa):0] = Rs1.W[x] <<(logic) sa; + * if (res > (2^31)-1) { + * res[31:0] = 0x7fffffff; OV = 1; + * } else if (res < -2^31) { + * res[31:0] = 0x80000000; OV = 1; + * } + * Rd.W[x] = res[31:0]; + * } + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSLRA32_U(unsigned long a, int b) +{ + register unsigned long result; + __ASM volatile("kslra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.18.2. KSLRA32.u ===== */ + +/* ===== Inline Function Start for 4.19. KSTAS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief KSTAS32 (SIMD 32-bit Signed Saturating Straight Addition & Subtraction) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KSTAS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer element saturating addition and 32-bit signed integer element + * saturating subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit + * elements. + * + * **Description**:\n + * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit + * integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit integer element in [31:0] of + * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number + * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated + * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction. + * + * **Operations**:\n + * ~~~ + * res[1] = Rs1.W[1] + Rs2.W[1]; + * res[0] = Rs1.W[0] - Rs2.W[0]; + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[1] = res[1]; + * Rd.W[0] = res[0]; + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSTAS32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.19. KSTAS32 ===== */ + +/* ===== Inline Function Start for 4.20. KSTSA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief KSTSA32 (SIMD 32-bit Signed Saturating Straight Subtraction & Addition) + * \details + * **Type**: SIM (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KSTSA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element + * saturating addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit + * elements. + * *Description: * + * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer + * element in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with + * the 32-bit integer element in [31:0] of Rs2. If any of the results are beyond the Q31 number range (- + * 231 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are + * written to [63:32] of Rd for subtraction and [31:0] of Rd for addition. + * + * **Operations**:\n + * ~~~ + * res[1] = Rs1.W[1] - Rs2.W[1]; + * res[0] = Rs1.W[0] + Rs2.W[0]; + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[1] = res[1]; + * Rd.W[0] = res[0]; + * for RV64, x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSTSA32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("kstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.20. KSTSA32 ===== */ + +/* ===== Inline Function Start for 4.21. KSUB32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief KSUB32 (SIMD 32-bit Signed Saturating Subtraction) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * KSUB32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer elements saturating subtractions simultaneously. + * + * **Description**:\n + * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit + * signed integer elements in Rs1. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= + * 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to + * Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.W[x] - Rs2.W[x]; + * if (res[x] > (2^31)-1) { + * res[x] = (2^31)-1; + * OV = 1; + * } else if (res[x] < -2^31) { + * res[x] = -2^31; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_KSUB32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.21. KSUB32 ===== */ + +/* ===== Inline Function Start for 4.22.1. PKBB32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK + * \brief PKBB32 (Pack Two 32-bit Data from Both Bottom Half) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * PKBB32 Rd, Rs1, Rs2 + * PKBT32 Rd, Rs1, Rs2 + * PKTT32 Rd, Rs1, Rs2 + * PKTB32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Pack 32-bit data from 64-bit chunks in two registers. + * * PKBB32: bottom.bottom + * * PKBT32: bottom.top + * * PKTT32: top.top + * * PKTB32: top.bottom + * + * **Description**:\n + * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. + * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. + * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. + * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. + * + * **Operations**:\n + * ~~~ + * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32 + * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32 + * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32 + * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_PKBB32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("pkbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.22.1. PKBB32 ===== */ + +/* ===== Inline Function Start for 4.22.2. PKBT32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK + * \brief PKBT32 (Pack Two 32-bit Data from Bottom and Top Half) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * PKBB32 Rd, Rs1, Rs2 + * PKBT32 Rd, Rs1, Rs2 + * PKTT32 Rd, Rs1, Rs2 + * PKTB32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Pack 32-bit data from 64-bit chunks in two registers. + * * PKBB32: bottom.bottom + * * PKBT32: bottom.top + * * PKTT32: top.top + * * PKTB32: top.bottom + * + * **Description**:\n + * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. + * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. + * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. + * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. + * + * **Operations**:\n + * ~~~ + * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32 + * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32 + * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32 + * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_PKBT32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("pkbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.22.2. PKBT32 ===== */ + +/* ===== Inline Function Start for 4.22.3. PKTT32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK + * \brief PKTT32 (Pack Two 32-bit Data from Both Top Half) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * PKBB32 Rd, Rs1, Rs2 + * PKBT32 Rd, Rs1, Rs2 + * PKTT32 Rd, Rs1, Rs2 + * PKTB32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Pack 32-bit data from 64-bit chunks in two registers. + * * PKBB32: bottom.bottom + * * PKBT32: bottom.top + * * PKTT32: top.top + * * PKTB32: top.bottom + * + * **Description**:\n + * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. + * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. + * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. + * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. + * + * **Operations**:\n + * ~~~ + * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32 + * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32 + * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32 + * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_PKTT32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("pktt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.22.3. PKTT32 ===== */ + +/* ===== Inline Function Start for 4.22.4. PKTB32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK + * \brief PKTB32 (Pack Two 32-bit Data from Top and Bottom Half) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * PKBB32 Rd, Rs1, Rs2 + * PKBT32 Rd, Rs1, Rs2 + * PKTT32 Rd, Rs1, Rs2 + * PKTB32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Pack 32-bit data from 64-bit chunks in two registers. + * * PKBB32: bottom.bottom + * * PKBT32: bottom.top + * * PKTT32: top.top + * * PKTB32: top.bottom + * + * **Description**:\n + * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. + * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. + * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0]. + * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0]. + * + * **Operations**:\n + * ~~~ + * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32 + * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32 + * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32 + * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_PKTB32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("pktb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.22.4. PKTB32 ===== */ + +/* ===== Inline Function Start for 4.23. RADD32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief RADD32 (SIMD 32-bit Signed Halving Addition) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * RADD32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer element additions simultaneously. The results are halved to avoid + * overflow or saturation. + * + * **Description**:\n + * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed + * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to + * Rd. + * + * **Examples**:\n + * ~~~ + * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF Rd = 0x7FFFFFFF + * * Rs1 = 0x80000000, Rs2 = 0x80000000 Rd = 0x80000000 + * * Rs1 = 0x40000000, Rs2 = 0x80000000 Rd = 0xE0000000 + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) s>> 1; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RADD32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("radd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.23. RADD32 ===== */ + +/* ===== Inline Function Start for 4.24. RCRAS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief RCRAS32 (SIMD 32-bit Signed Halving Cross Addition & Subtraction) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * RCRAS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in + * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to + * avoid overflow or saturation. + * + * **Description**:\n + * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit + * signed integer element in [31:0] of Rs2, and subtracts the 32-bit signed integer element in [63:32] of + * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first + * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd + * for subtraction. + * + * **Examples**:\n + * ~~~ + * Please see `RADD32` and `RSUB32` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1; + * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RCRAS32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("rcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.24. RCRAS32 ===== */ + +/* ===== Inline Function Start for 4.25. RCRSA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief RCRSA32 (SIMD 32-bit Signed Halving Cross Subtraction & Addition) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * RCRSA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in + * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to + * avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 32-bit signed integer element in [31:0] of Rs2 from the + * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0] + * of Rs1 with the 32-bit signed integer element in [63:32] of Rs2. The two results are first + * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of + * Rd for addition. + * + * **Examples**:\n + * ~~~ + * Please see `RADD32` and `RSUB32` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1; + * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RCRSA32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("rcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.25. RCRSA32 ===== */ + +/* ===== Inline Function Start for 4.26. RSTAS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief RSTAS32 (SIMD 32-bit Signed Halving Straight Addition & Subtraction) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * RSTAS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in + * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are + * halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit + * signed integer element in [63:32] of Rs2, and subtracts the 32-bit signed integer element in [31:0] of + * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first + * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd + * for subtraction. + * + * **Examples**:\n + * ~~~ + * Please see `RADD32` and `RSUB32` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) s>> 1; + * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) s>> 1; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RSTAS32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("rstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.26. RSTAS32 ===== */ + +/* ===== Inline Function Start for 4.27. RSTSA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief RSTSA32 (SIMD 32-bit Signed Halving Straight Subtraction & Addition) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * RSTSA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in + * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are + * halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 32-bit signed integer element in [63:32] of Rs2 from the + * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0] + * of Rs1 with the 32-bit signed integer element in [31:0] of Rs2. The two results are first arithmetically + * right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for addition. + * + * **Examples**:\n + * ~~~ + * Please see `RADD32` and `RSUB32` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) s>> 1; + * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) s>> 1; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RSTSA32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("rstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.27. RSTSA32 ===== */ + +/* ===== Inline Function Start for 4.28. RSUB32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief RSUB32 (SIMD 32-bit Signed Halving Subtraction) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * RSUB32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer element subtractions simultaneously. The results are halved to + * avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit + * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then + * written to Rd. + * + * **Examples**:\n + * ~~~ + * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0x7FFFFFFF + * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x80000000 + * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0xA0000000 + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_RSUB32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("rsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.28. RSUB32 ===== */ + +/* ===== Inline Function Start for 4.29. SLL32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief SLL32 (SIMD 32-bit Shift Left Logical) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SLL32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit elements logical left shift operations simultaneously. The shift amount is a + * variable from a GPR. + * + * **Description**:\n + * The 32-bit elements in Rs1 are left-shifted logically. And the results are written to Rd. + * The shifted out bits are filled with zero and the shift amount is specified by the low-order 5-bits of + * the value in the Rs2 register. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[4:0]; + * Rd.W[x] = Rs1.W[x] << sa; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SLL32(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("sll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.29. SLL32 ===== */ + +/* ===== Inline Function Start for 4.30. SLLI32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief SLLI32 (SIMD 32-bit Shift Left Logical Immediate) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SLLI32 Rd, Rs1, imm5u[4:0] + * ~~~ + * + * **Purpose**:\n + * Do 32-bit element logical left shift operations simultaneously. The shift amount is an + * immediate value. + * + * **Description**:\n + * The 32-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with + * zero and the shift amount is specified by the imm5u[4:0] constant. And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm5u[4:0]; + * Rd.W[x] = Rs1.W[x] << sa; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SLLI32(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("slli32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.30. SLLI32 ===== */ + +/* ===== Inline Function Start for 4.31. SMAX32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC + * \brief SMAX32 (SIMD 32-bit Signed Maximum) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SMAX32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer elements finding maximum operations simultaneously. + * + * **Description**:\n + * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit + * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The + * selected results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[x] = (Rs1.W[x] > Rs2.W[x])? Rs1.W[x] : Rs2.W[x]; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SMAX32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("smax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.31. SMAX32 ===== */ + +/* ===== Inline Function Start for 4.32.1. SMBB32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT + * \brief SMBB32 (Signed Multiply Bottom Word & Bottom Word) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SMBB32 Rd, Rs1, Rs2 + * SMBT32 Rd, Rs1, Rs2 + * SMTT32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit element of a register with the signed 32-bit element of another + * register and write the 64-bit result to a third register. + * * SMBB32: bottom*bottom + * * SMBT32: bottom*top + * * SMTT32: top*top + * + * **Description**:\n + * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit + * element of Rs2. It is actually an alias of `MULSR64` instruction. + * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit + * element of Rs2. + * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element + * of Rs2. + * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as + * signed integers. + * + * **Operations**:\n + * ~~~ + * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1]; + * // SMTT32 Rd = res; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMBB32(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("smbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.32.1. SMBB32 ===== */ + +/* ===== Inline Function Start for 4.32.2. SMBT32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT + * \brief SMBT32 (Signed Multiply Bottom Word & Top Word) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SMBB32 Rd, Rs1, Rs2 + * SMBT32 Rd, Rs1, Rs2 + * SMTT32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit element of a register with the signed 32-bit element of another + * register and write the 64-bit result to a third register. + * * SMBB32: bottom*bottom + * * SMBT32: bottom*top + * * SMTT32: top*top + * + * **Description**:\n + * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit + * element of Rs2. It is actually an alias of `MULSR64` instruction. + * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit + * element of Rs2. + * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element + * of Rs2. + * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as + * signed integers. + * + * **Operations**:\n + * ~~~ + * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1]; + * // SMTT32 Rd = res; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMBT32(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("smbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.32.2. SMBT32 ===== */ + +/* ===== Inline Function Start for 4.32.3. SMTT32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT + * \brief SMTT32 (Signed Multiply Top Word & Top Word) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SMBB32 Rd, Rs1, Rs2 + * SMBT32 Rd, Rs1, Rs2 + * SMTT32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Multiply the signed 32-bit element of a register with the signed 32-bit element of another + * register and write the 64-bit result to a third register. + * * SMBB32: bottom*bottom + * * SMBT32: bottom*top + * * SMTT32: top*top + * + * **Description**:\n + * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit + * element of Rs2. It is actually an alias of `MULSR64` instruction. + * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit + * element of Rs2. + * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element + * of Rs2. + * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as + * signed integers. + * + * **Operations**:\n + * ~~~ + * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1]; + * // SMTT32 Rd = res; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMTT32(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("smtt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.32.3. SMTT32 ===== */ + +/* ===== Inline Function Start for 4.33.1. SMDS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC + * \brief SMDS32 (Signed Multiply Two Words and Subtract) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SMDS32 Rd, Rs1, Rs2 + * SMDRS32 Rd, Rs1, Rs2 + * SMXDS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then + * perform a subtraction operation between the two 64-bit results. + * * SMDS32: top*top - bottom*bottom + * * SMDRS32: bottom*bottom - top*top + * * SMXDS32: top*bottom - bottom*top + * + * **Description**:\n + * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit + * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of + * Rs1 with the top 32-bit element of Rs2. + * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit + * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit + * element of Rs1 with the bottom 32-bit element of Rs2. + * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit + * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of + * Rs1 with the bottom 32-bit element of Rs2. + * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32 + * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32 + * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMDS32(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("smds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.33.1. SMDS32 ===== */ + +/* ===== Inline Function Start for 4.33.2. SMDRS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC + * \brief SMDRS32 (Signed Multiply Two Words and Reverse Subtract) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SMDS32 Rd, Rs1, Rs2 + * SMDRS32 Rd, Rs1, Rs2 + * SMXDS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then + * perform a subtraction operation between the two 64-bit results. + * * SMDS32: top*top - bottom*bottom + * * SMDRS32: bottom*bottom - top*top + * * SMXDS32: top*bottom - bottom*top + * + * **Description**:\n + * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit + * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of + * Rs1 with the top 32-bit element of Rs2. + * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit + * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit + * element of Rs1 with the bottom 32-bit element of Rs2. + * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit + * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of + * Rs1 with the bottom 32-bit element of Rs2. + * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32 + * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32 + * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMDRS32(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("smdrs32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.33.2. SMDRS32 ===== */ + +/* ===== Inline Function Start for 4.33.3. SMXDS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC + * \brief SMXDS32 (Signed Crossed Multiply Two Words and Subtract) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SMDS32 Rd, Rs1, Rs2 + * SMDRS32 Rd, Rs1, Rs2 + * SMXDS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then + * perform a subtraction operation between the two 64-bit results. + * * SMDS32: top*top - bottom*bottom + * * SMDRS32: bottom*bottom - top*top + * * SMXDS32: top*bottom - bottom*top + * + * **Description**:\n + * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit + * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of + * Rs1 with the top 32-bit element of Rs2. + * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit + * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit + * element of Rs1 with the bottom 32-bit element of Rs2. + * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit + * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of + * Rs1 with the bottom 32-bit element of Rs2. + * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed + * integers. + * + * **Operations**:\n + * ~~~ + * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32 + * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32 + * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SMXDS32(unsigned long a, unsigned long b) +{ + register long result; + __ASM volatile("smxds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.33.3. SMXDS32 ===== */ + +/* ===== Inline Function Start for 4.34. SMIN32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC + * \brief SMIN32 (SIMD 32-bit Signed Minimum) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SMIN32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit signed integer elements finding minimum operations simultaneously. + * + * **Description**:\n + * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit + * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected + * results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[x] = (Rs1.W[x] < Rs2.W[x])? Rs1.W[x] : Rs2.W[x]; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SMIN32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("smin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.34. SMIN32 ===== */ + +/* ===== Inline Function Start for 4.35.1. SRA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief SRA32 (SIMD 32-bit Shift Right Arithmetic) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SRA32 Rd, Rs1, Rs2 + * SRA32.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a + * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out + * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order + * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is + * added to the most significant discarded bit of each 32-bit data element to calculate the final results. + * And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[4:0]; + * if (sa > 0) { + * if (`.u` form) { // SRA32.u + * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1; + * Rd.W[x] = res[31:0]; + * else { // SRA32 + * Rd.W[x] = SE32(Rs1.W[x][31:sa]) + * } + * } else { + * Rd = Rs1; + * } + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRA32(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.35.1. SRA32 ===== */ + +/* ===== Inline Function Start for 4.35.2. SRA32.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief SRA32.u (SIMD 32-bit Rounding Shift Right Arithmetic) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SRA32 Rd, Rs1, Rs2 + * SRA32.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a + * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out + * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order + * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is + * added to the most significant discarded bit of each 32-bit data element to calculate the final results. + * And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[4:0]; + * if (sa > 0) { + * if (`.u` form) { // SRA32.u + * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1; + * Rd.W[x] = res[31:0]; + * else { // SRA32 + * Rd.W[x] = SE32(Rs1.W[x][31:sa]) + * } + * } else { + * Rd = Rs1; + * } + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRA32_U(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("sra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.35.2. SRA32.u ===== */ + +/* ===== Inline Function Start for 4.36.1. SRAI32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief SRAI32 (SIMD 32-bit Shift Right Arithmetic Immediate) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SRAI32 Rd, Rs1, imm5u + * SRAI32.u Rd, Rs1, imm5u + * ~~~ + * + * **Purpose**:\n + * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is + * an immediate value. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out + * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the + * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most + * significant discarded bit of each 32-bit data to calculate the final results. And the results are written + * to Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm5u[4:0]; + * if (sa > 0) { + * if (`.u` form) { // SRAI32.u + * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1; + * Rd.W[x] = res[31:0]; + * else { // SRAI32 + * Rd.W[x] = SE32(Rs1.W[x][31:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRAI32(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("srai32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.36.1. SRAI32 ===== */ + +/* ===== Inline Function Start for 4.36.2. SRAI32.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief SRAI32.u (SIMD 32-bit Rounding Shift Right Arithmetic Immediate) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SRAI32 Rd, Rs1, imm5u + * SRAI32.u Rd, Rs1, imm5u + * ~~~ + * + * **Purpose**:\n + * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is + * an immediate value. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out + * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the + * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most + * significant discarded bit of each 32-bit data to calculate the final results. And the results are written + * to Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm5u[4:0]; + * if (sa > 0) { + * if (`.u` form) { // SRAI32.u + * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1; + * Rd.W[x] = res[31:0]; + * else { // SRAI32 + * Rd.W[x] = SE32(Rs1.W[x][31:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRAI32_U(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("srai32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.36.2. SRAI32.u ===== */ + +/* ===== Inline Function Start for 4.37. SRAIW.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT + * \brief SRAIW.u (Rounding Shift Right Arithmetic Immediate Word) + * \details + * **Type**: DSP (RV64 only) + * + * **Syntax**:\n + * ~~~ + * SRAIW.u Rd, Rs1, imm5u + * ~~~ + * + * **Purpose**:\n + * Perform a 32-bit arithmetic right shift operation with rounding. The shift amount is an + * immediate value. + * + * **Description**:\n + * This instruction right-shifts the lower 32-bit content of Rs1 arithmetically. The shifted + * out bits are filled with the sign-bit Rs1(31) and the shift amount is specified by the imm5u constant. + * For the rounding operation, a value of 1 is added to the most significant discarded bit of the data to + * calculate the final result. And the result is sign-extended and written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm5u; + * if (sa != 0) { + * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1; + * Rd = SE32(res[31:0]); + * } else { + * Rd = SE32(Rs1.W[0]); + * } + * ~~~ + * + * \param [in] a int type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in long type + */ +__STATIC_FORCEINLINE long __RV_SRAIW_U(int a, unsigned int b) +{ + register long result; + __ASM volatile("sraiw.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.37. SRAIW.u ===== */ + +/* ===== Inline Function Start for 4.38.1. SRL32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief SRL32 (SIMD 32-bit Shift Right Logical) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SRL32 Rd, Rs1, Rs2 + * SRL32.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit element logical right shift operations simultaneously. The shift amount is a + * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits + * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2 + * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant + * discarded bit of each 32-bit data element to calculate the final results. And the results are written to + * Rd. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[4:0]; + * if (sa > 0) { + * if (`.u` form) { // SRA32.u + * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1; + * Rd.W[x] = res[31:0]; + * else { // SRA32 + * Rd.W[x] = ZE32(Rs1.W[x][31:sa]) + * } + * } else { + * Rd = Rs1; + * } + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRL32(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("srl32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.38.1. SRL32 ===== */ + +/* ===== Inline Function Start for 4.38.2. SRL32.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief SRL32.u (SIMD 32-bit Rounding Shift Right Logical) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SRL32 Rd, Rs1, Rs2 + * SRL32.u Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit element logical right shift operations simultaneously. The shift amount is a + * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted + * results. + * + * **Description**:\n + * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits + * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2 + * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant + * discarded bit of each 32-bit data element to calculate the final results. And the results are written to + * Rd. + * + * **Operations**:\n + * ~~~ + * sa = Rs2[4:0]; + * if (sa > 0) { + * if (`.u` form) { // SRA32.u + * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1; + * Rd.W[x] = res[31:0]; + * else { // SRA32 + * Rd.W[x] = ZE32(Rs1.W[x][31:sa]) + * } + * } else { + * Rd = Rs1; + * } + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRL32_U(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("srl32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.38.2. SRL32.u ===== */ + +/* ===== Inline Function Start for 4.39.1. SRLI32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief SRLI32 (SIMD 32-bit Shift Right Logical Immediate) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SRLI32 Rd, Rs1, imm5u + * SRLI32.u Rd, Rs1, imm5u + * ~~~ + * + * **Purpose**:\n + * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an + * immediate value. The `.u` form performs additional rounding up operations on the shifted results. + * + * **Description**:\n + * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits + * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding + * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit + * data to calculate the final results. And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm5u[4:0]; + * if (sa > 0) { + * if (`.u` form) { // SRLI32.u + * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1; + * Rd.W[x] = res[31:0]; + * else { // SRLI32 + * Rd.W[x] = ZE32(Rs1.W[x][31:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRLI32(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("srli32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.39.1. SRLI32 ===== */ + +/* ===== Inline Function Start for 4.39.2. SRLI32.u ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT + * \brief SRLI32.u (SIMD 32-bit Rounding Shift Right Logical Immediate) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SRLI32 Rd, Rs1, imm5u + * SRLI32.u Rd, Rs1, imm5u + * ~~~ + * + * **Purpose**:\n + * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an + * immediate value. The `.u` form performs additional rounding up operations on the shifted results. + * + * **Description**:\n + * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits + * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding + * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit + * data to calculate the final results. And the results are written to Rd. + * + * **Operations**:\n + * ~~~ + * sa = imm5u[4:0]; + * if (sa > 0) { + * if (`.u` form) { // SRLI32.u + * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1; + * Rd.W[x] = res[31:0]; + * else { // SRLI32 + * Rd.W[x] = ZE32(Rs1.W[x][31:sa]); + * } + * } else { + * Rd = Rs1; + * } + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned int type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SRLI32_U(unsigned long a, unsigned int b) +{ + register unsigned long result; + __ASM volatile("srli32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.39.2. SRLI32.u ===== */ + +/* ===== Inline Function Start for 4.40. STAS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief STAS32 (SIMD 32-bit Straight Addition & Subtraction) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * STAS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit + * chunk simultaneously. Operands are from corresponding 32-bit elements. + * + * **Description**:\n + * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit + * integer element in [63:32] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts + * the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and + * writes the result to [31:0] of Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned operations. + * + * **Operations**:\n + * ~~~ + * Rd.W[1] = Rs1.W[1] + Rs2.W[1]; + * Rd.W[0] = Rs1.W[0] - Rs2.W[0]; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_STAS32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("stas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.40. STAS32 ===== */ + +/* ===== Inline Function Start for 4.41. STSA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief STSA32 (SIMD 32-bit Straight Subtraction & Addition) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * STSA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit + * chunk simultaneously. Operands are from corresponding 32-bit elements. + * *Description: * + * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer + * element in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit + * integer element in [31:0] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and writes the result + * to [31:0] of Rd + * + * **Note**:\n + * This instruction can be used for either signed or unsigned operations. + * + * **Operations**:\n + * ~~~ + * Rd.W[1] = Rs1.W[1] - Rs2.W[1]; + * Rd.W[0] = Rs1.W[0] + Rs2.W[0]; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_STSA32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("stsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.41. STSA32 ===== */ + +/* ===== Inline Function Start for 4.42. SUB32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief SUB32 (SIMD 32-bit Subtraction) + * \details + * **Type**: DSP (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * SUB32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit integer element subtractions simultaneously. + * + * **Description**:\n + * This instruction subtracts the 32-bit integer elements in Rs2 from the 32-bit integer + * elements in Rs1, and then writes the results to Rd. + * + * **Note**:\n + * This instruction can be used for either signed or unsigned subtraction. + * + * **Operations**:\n + * ~~~ + * Rd.W[x] = Rs1.W[x] - Rs2.W[x]; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_SUB32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("sub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.42. SUB32 ===== */ + +/* ===== Inline Function Start for 4.43. UKADD32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief UKADD32 (SIMD 32-bit Unsigned Saturating Addition) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * UKADD32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit unsigned integer element saturating additions simultaneously. + * + * **Description**:\n + * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit + * unsigned integer elements in Rs2. If any of the results are beyond the 32-bit unsigned number + * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated + * results are written to Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.W[x] + Rs2.W[x]; + * if (res[x] > (2^32)-1) { + * res[x] = (2^32)-1; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKADD32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ukadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.43. UKADD32 ===== */ + +/* ===== Inline Function Start for 4.44. UKCRAS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief UKCRAS32 (SIMD 32-bit Unsigned Saturating Cross Addition & Subtraction) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * UKCRAS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned + * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed + * 32-bit elements. + * + * **Description**:\n + * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32- + * bit unsigned integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit unsigned + * integer element in [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the + * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the + * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and + * [31:0] of Rd for subtraction. + * + * **Operations**:\n + * ~~~ + * res1 = Rs1.W[1] + Rs2.W[0]; + * res2 = Rs1.W[0] - Rs2.W[1]; + * if (res1 > (2^32)-1) { + * res1 = (2^32)-1; + * OV = 1; + * } + * if (res2 < 0) { + * res2 = 0; + * OV = 1; + * } + * Rd.W[1] = res1; + * Rd.W[0] = res2; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKCRAS32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ukcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.44. UKCRAS32 ===== */ + +/* ===== Inline Function Start for 4.45. UKCRSA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief UKCRSA32 (SIMD 32-bit Unsigned Saturating Cross Subtraction & Addition) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * UKCRSA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned + * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from crossed + * 32-bit elements. + * + * **Description**:\n + * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the + * 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned + * integer element in [63:32] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the + * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the + * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and + * [31:0] of Rd for addition. + * + * **Operations**:\n + * ~~~ + * res1 = Rs1.W[1] - Rs2.W[0]; + * res2 = Rs1.W[0] + Rs2.W[1]; + * if (res1 < 0) { + * res1 = 0; + * OV = 1; + * } else if (res2 > (2^32)-1) { + * res2 = (2^32)-1; + * OV = 1; + * } + * Rd.W[1] = res1; + * Rd.W[0] = res2; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKCRSA32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ukcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.45. UKCRSA32 ===== */ + +/* ===== Inline Function Start for 4.46. UKSTAS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief UKSTAS32 (SIMD 32-bit Unsigned Saturating Straight Addition & Subtraction) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * UKSTAS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned + * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from + * corresponding 32-bit elements. + * + * **Description**:\n + * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32- + * bit unsigned integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit unsigned + * integer element in [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the + * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the + * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and + * [31:0] of Rd for subtraction. + * + * **Operations**:\n + * ~~~ + * res1 = Rs1.W[1] + Rs2.W[1]; + * res2 = Rs1.W[0] - Rs2.W[0]; + * if (res1 > (2^32)-1) { + * res1 = (2^32)-1; + * OV = 1; + * } + * if (res2 < 0) { + * res2 = 0; + * OV = 1; + * } + * Rd.W[1] = res1; + * Rd.W[0] = res2; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKSTAS32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ukstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.46. UKSTAS32 ===== */ + +/* ===== Inline Function Start for 4.47. UKSTSA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief UKSTSA32 (SIMD 32-bit Unsigned Saturating Straight Subtraction & Addition) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * UKSTSA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned + * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from + * corresponding 32-bit elements. + * + * **Description**:\n + * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from + * the 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned + * integer element in [31:0] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the + * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the + * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and + * [31:0] of Rd for addition. + * + * **Operations**:\n + * ~~~ + * res1 = Rs1.W[1] - Rs2.W[1]; + * res2 = Rs1.W[0] + Rs2.W[0]; + * if (res1 < 0) { + * res1 = 0; + * OV = 1; + * } else if (res2 > (2^32)-1) { + * res2 = (2^32)-1; + * OV = 1; + * } + * Rd.W[1] = res1; + * Rd.W[0] = res2; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKSTSA32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ukstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.47. UKSTSA32 ===== */ + +/* ===== Inline Function Start for 4.48. UKSUB32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief UKSUB32 (SIMD 32-bit Unsigned Saturating Subtraction) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * UKSUB32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit unsigned integer elements saturating subtractions simultaneously. + * + * **Description**:\n + * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit + * unsigned integer elements in Rs1. If any of the results are beyond the 32-bit unsigned number + * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated + * results are written to Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.W[x] - Rs2.W[x]; + * if (res[x] < 0) { + * res[x] = 0; + * OV = 1; + * } + * Rd.W[x] = res[x]; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UKSUB32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("uksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.48. UKSUB32 ===== */ + +/* ===== Inline Function Start for 4.49. UMAX32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC + * \brief UMAX32 (SIMD 32-bit Unsigned Maximum) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * UMAX32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit unsigned integer elements finding maximum operations simultaneously. + * + * **Description**:\n + * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit + * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The + * selected results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[x] = (Rs1.W[x] u> Rs2.W[x])? Rs1.W[x] : Rs2.W[x]; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_UMAX32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("umax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.49. UMAX32 ===== */ + +/* ===== Inline Function Start for 4.50. UMIN32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC + * \brief UMIN32 (SIMD 32-bit Unsigned Minimum) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * UMIN32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit unsigned integer elements finding minimum operations simultaneously. + * + * **Description**:\n + * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit + * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The + * selected results are written to Rd. + * + * **Operations**:\n + * ~~~ + * Rd.W[x] = (Rs1.W[x] > 1; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URADD32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("uradd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.51. URADD32 ===== */ + +/* ===== Inline Function Start for 4.52. URCRAS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief URCRAS32 (SIMD 32-bit Unsigned Halving Cross Addition & Subtraction) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * URCRAS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element + * subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The + * results are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32- + * bit unsigned integer element in [31:0] of Rs2, and subtracts the 32-bit unsigned integer element in + * [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first + * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for + * subtraction. + * + * **Examples**:\n + * ~~~ + * Please see `URADD32` and `URSUB32` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) u>> 1; + * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) u>> 1; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URCRAS32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("urcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.52. URCRAS32 ===== */ + +/* ===== Inline Function Start for 4.53. URCRSA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief URCRSA32 (SIMD 32-bit Unsigned Halving Cross Subtraction & Addition) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * URCRSA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element + * addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results + * are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the + * 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer in + * [31:0] of Rs1 with the 32-bit unsigned integer element in [63:32] of Rs2. The two results are first + * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for + * addition. + * + * **Examples**:\n + * ~~~ + * Please see `URADD32` and `URSUB32` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) u>> 1; + * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) u>> 1; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URCRSA32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("urcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.53. URCRSA32 ===== */ + +/* ===== Inline Function Start for 4.54. URSTAS32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief URSTAS32 (SIMD 32-bit Unsigned Halving Straight Addition & Subtraction) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * URSTAS32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element + * subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. + * The results are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32- + * bit unsigned integer element in [63:32] of Rs2, and subtracts the 32-bit unsigned integer element in + * [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first + * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for + * subtraction. + * + * **Examples**:\n + * ~~~ + * Please see `URADD32` and `URSUB32` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) u>> 1; + * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) u>> 1; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URSTAS32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("urstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.54. URSTAS32 ===== */ + +/* ===== Inline Function Start for 4.55. URSTSA32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief URSTSA32 (SIMD 32-bit Unsigned Halving Straight Subtraction & Addition) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * URSTSA32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element + * addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The + * results are halved to avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from + * the 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer + * in [31:0] of Rs1 with the 32-bit unsigned integer element in [31:0] of Rs2. The two results are first + * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for + * addition. + * + * **Examples**:\n + * ~~~ + * Please see `URADD32` and `URSUB32` instructions. + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) u>> 1; + * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) u>> 1; + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URSTSA32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("urstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.55. URSTSA32 ===== */ + +/* ===== Inline Function Start for 4.56. URSUB32 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB + * \brief URSUB32 (SIMD 32-bit Unsigned Halving Subtraction) + * \details + * **Type**: SIMD (RV64 Only) + * + * **Syntax**:\n + * ~~~ + * URSUB32 Rd, Rs1, Rs2 + * ~~~ + * + * **Purpose**:\n + * Do 32-bit unsigned integer element subtractions simultaneously. The results are halved to + * avoid overflow or saturation. + * + * **Description**:\n + * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit + * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then + * written to Rd. + * + * **Examples**:\n + * ~~~ + * * Ra = 0x7FFFFFFF, Rb = 0x80000000, Rt = 0xFFFFFFFF + * * Ra = 0x80000000, Rb = 0x7FFFFFFF, Rt = 0x00000000 + * * Ra = 0x80000000, Rb = 0x40000000, Rt = 0x20000000 + * ~~~ + * + * **Operations**:\n + * ~~~ + * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) u>> 1; + * for RV64: x=1...0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \param [in] b unsigned long type of value stored in b + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_URSUB32(unsigned long a, unsigned long b) +{ + register unsigned long result; + __ASM volatile("ursub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for 4.56. URSUB32 ===== */ + +#endif /* __RISCV_XLEN == 64 */ + + +#if (__RISCV_XLEN == 32) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__) +/* XXXXX Nuclei Extended DSP Instructions for RV32 XXXXX */ +/** + * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM Nuclei Customized DSP Instructions + * \ingroup NMSIS_Core_DSP_Intrinsic + * \brief (RV32 only)Nuclei Customized DSP Instructions + * \details This is Nuclei customized DSP instructions only for RV32 + */ +/* ===== Inline Function Start for A.1. DKHM8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief DKHM8 (64-bit SIMD Signed Saturating Q7 Multiply) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * DKHM8 Rd, Rs1, Rs2 + * # Rd, Rs1, Rs2 are all even/odd pair of registers + * ~~~ + * + * **Purpose**:\n + * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7 + * numbers again. + * + * **Description**:\n + * For the `DKHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 + * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7 + * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2. + * + * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then + * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen. + * The result will be saturated to 0x7F and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top + * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom + * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { + * if (0x80 != aop | 0x80 != bop) { + * res = (aop s* bop) >> 7; + * } else { + * res= 0x7F; + * OV = 1; + * } + * } + * Rd.H[x/2] = concat(rest, resb); + * for RV32, x=0,2,4,6 + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b unsigned long long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_DKHM8(unsigned long long a, unsigned long long b) +{ + unsigned long long result; + __ASM volatile("dkhm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for A.1. DKHM8 ===== */ + +/* ===== Inline Function Start for A.2. DKHM16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief DKHM16 (64-bit SIMD Signed Saturating Q15 Multiply) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * DKHM16 Rd, Rs1, Rs2 + * # Rd, Rs1, Rs2 are all even/odd pair of registers + * ~~~ + * + * **Purpose**:\n + * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to + * Q15 numbers again. + * + * **Description**:\n + * For the `DKHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in + * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom + * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in + * Rs2. + * + * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are + * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will + * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set. + * + * **Operations**:\n + * ~~~ + * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top + * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom + * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) { + * if (0x8000 != aop | 0x8000 != bop) { + * res = (aop s* bop) >> 15; + * } else { + * res= 0x7FFF; + * OV = 1; + * } + * } + * Rd.W[x/2] = concat(rest, resb); + * for RV32: x=0, 2 + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b unsigned long long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_DKHM16(unsigned long long a, unsigned long long b) +{ + unsigned long long result; + __ASM volatile("dkhm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for A.2. DKHM16 ===== */ + +/* ===== Inline Function Start for A.3. DKABS8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief DKABS8 (64-bit SIMD 8-bit Saturating Absolute) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * DKABS8 Rd, Rs1 + * # Rd, Rs1 are all even/odd pair of registers + * ~~~ + * + * **Purpose**:\n + * Get the absolute value of 8-bit signed integer elements simultaneously. + * + * **Description**:\n + * This instruction calculates the absolute value of 8-bit signed integer elements stored + * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates + * 0x7f as the output and sets the OV bit to 1. + * + * **Operations**:\n + * ~~~ + * src = Rs1.B[x]; + * if (src == 0x80) { + * src = 0x7f; + * OV = 1; + * } else if (src[7] == 1) + * src = -src; + * } + * Rd.B[x] = src; + * for RV32: x=7...0, + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_DKABS8(unsigned long long a) +{ + unsigned long long result; + __ASM volatile("dkabs8 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for A.3. DKABS8 ===== */ + +/* ===== Inline Function Start for A.4. DKABS16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief DKABS16 (64-bit SIMD 16-bit Saturating Absolute) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * DKABS16 Rd, Rs1 + * # Rd, Rs1 are all even/odd pair of registers + * ~~~ + * + * **Purpose**:\n + * Get the absolute value of 16-bit signed integer elements simultaneously. + * + * **Description**:\n + * This instruction calculates the absolute value of 16-bit signed integer elements stored + * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction + * generates 0x7fff as the output and sets the OV bit to 1. + * + * **Operations**:\n + * ~~~ + * src = Rs1.H[x]; + * if (src == 0x8000) { + * src = 0x7fff; + * OV = 1; + * } else if (src[15] == 1) + * src = -src; + * } + * Rd.H[x] = src; + * for RV32: x=3...0, + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_DKABS16(unsigned long long a) +{ + unsigned long long result; + __ASM volatile("dkabs16 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for A.4. DKABS16 ===== */ + +/* ===== Inline Function Start for A.5. DKSLRA8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief DKSLRA8 (64-bit SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * DKSLRA8 Rd, Rs1, Rs2 + * # Rd, Rs1 are all even/odd pair of registers + * ~~~ + * + * **Purpose**:\n + * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with + * Q7 saturation for the left shift. + * + * **Description**:\n + * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically + * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means + * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the + * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be + * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`. + * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. + * If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect + * this instruction. + * + * **Operations**:\n + * ~~~ + * if (Rs2[3:0] < 0) { + * sa = -Rs2[3:0]; + * sa = (sa == 8)? 7 : sa; + * Rd.B[x] = SE8(Rs1.B[x][7:sa]); + * } else { + * sa = Rs2[2:0]; + * res[(7+sa):0] = Rs1.B[x] <<(logic) sa; + * if (res > (2^7)-1) { + * res[7:0] = 0x7f; OV = 1; + * } else if (res < -2^7) { + * res[7:0] = 0x80; OV = 1; + * } + * Rd.B[x] = res[7:0]; + * } + * for RV32: x=7...0, + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_DKSLRA8(unsigned long long a, int b) +{ + unsigned long long result; + __ASM volatile("dkslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for A.5. DKSLRA8 ===== */ + +/* ===== Inline Function Start for A.6. DKSLRA16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief DKSLRA16 (64-bit SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * DKSLRA16 Rd, Rs1, Rs2 + * # Rd, Rs1 are all even/odd pair of registers + * ~~~ + * + * **Purpose**:\n + * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with + * Q15 saturation for the left shift. + * + * **Description**:\n + * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically + * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means + * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the + * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be + * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`. + * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. + * After the shift, saturation, or rounding, the final results are written to + * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect + * this instruction. + * + * **Operations**:\n + * ~~~ + * if (Rs2[4:0] < 0) { + * sa = -Rs2[4:0]; + * sa = (sa == 16)? 15 : sa; + * Rd.H[x] = SE16(Rs1.H[x][15:sa]); + * } else { + * sa = Rs2[3:0]; + * res[(15+sa):0] = Rs1.H[x] <<(logic) sa; + * if (res > (2^15)-1) { + * res[15:0] = 0x7fff; OV = 1; + * } else if (res < -2^15) { + * res[15:0] = 0x8000; OV = 1; + * } + * d.H[x] = res[15:0]; + * } + * for RV32: x=3...0, + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b int type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_DKSLRA16(unsigned long long a, int b) +{ + unsigned long long result; + __ASM volatile("dkslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for A.6. DKSLRA16 ===== */ + +/* ===== Inline Function Start for A.7. DKADD8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief DKADD8 (64-bit SIMD 8-bit Signed Saturating Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * DKADD8 Rd, Rs1, Rs2 + * # Rd, Rs1, Rs2 are all even/odd pair of registers + * ~~~ + * + * **Purpose**:\n + * Do 8-bit signed integer element saturating additions simultaneously. + * + * **Description**:\n + * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed + * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they + * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.B[x] + Rs2.B[x]; + * if (res[x] > 127) { + * res[x] = 127; + * OV = 1; + * } else if (res[x] < -128) { + * res[x] = -128; + * OV = 1; + * } + * Rd.B[x] = res[x]; + * for RV32: x=7...0, + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b unsigned long long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_DKADD8(unsigned long long a, unsigned long long b) +{ + unsigned long long result; + __ASM volatile("dkadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for A.7. DKADD8 ===== */ + +/* ===== Inline Function Start for A.8. DKADD16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief DKADD16 (64-bit SIMD 16-bit Signed Saturating Addition) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * DKADD16 Rd, Rs1, Rs2 + * # Rd, Rs1, Rs2 are all even/odd pair of registers + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer element saturating additions simultaneously. + * + * **Description**:\n + * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed + * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), + * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.H[x] + Rs2.H[x]; + * if (res[x] > 32767) { + * res[x] = 32767; + * OV = 1; + * } else if (res[x] < -32768) { + * res[x] = -32768; + * OV = 1; + * } + * Rd.H[x] = res[x]; + * for RV32: x=3...0, + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b unsigned long long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_DKADD16(unsigned long long a, unsigned long long b) +{ + unsigned long long result; + __ASM volatile("dkadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for A.8. DKADD16 ===== */ + +/* ===== Inline Function Start for A.10. DKSUB8 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief DKSUB8 (64-bit SIMD 8-bit Signed Saturating Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * DKSUB8 Rd, Rs1, Rs2 + * # Rd, Rs1, Rs2 are all even/odd pair of registers + * ~~~ + * + * **Purpose**:\n + * Do 8-bit signed elements saturating subtractions simultaneously. + * + * **Description**:\n + * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit + * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), + * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.B[x] - Rs2.B[x]; + * if (res[x] > (2^7)-1) { + * res[x] = (2^7)-1; + * OV = 1; + * } else if (res[x] < -2^7) { + * res[x] = -2^7; + * OV = 1; + * } + * Rd.B[x] = res[x]; + * for RV32: x=7...0, + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b unsigned long long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_DKSUB8(unsigned long long a, unsigned long long b) +{ + unsigned long long result; + __ASM volatile("dksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for A.9. DKSUB8 ===== */ + +/* ===== Inline Function Start for A.10. DKSUB16 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief DKSUB16 (64-bit SIMD 16-bit Signed Saturating Subtraction) + * \details + * **Type**: SIMD + * + * **Syntax**:\n + * ~~~ + * DKSUB16 Rd, Rs1, Rs2 + * # Rd, Rs1, Rs2 are all even/odd pair of registers + * ~~~ + * + * **Purpose**:\n + * Do 16-bit signed integer elements saturating subtractions simultaneously. + * + * **Description**:\n + * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit + * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= + * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to + * Rd. + * + * **Operations**:\n + * ~~~ + * res[x] = Rs1.H[x] - Rs2.H[x]; + * if (res[x] > (2^15)-1) { + * res[x] = (2^15)-1; + * OV = 1; + * } else if (res[x] < -2^15) { + * res[x] = -2^15; + * OV = 1; + * } + * Rd.H[x] = res[x]; + * for RV32: x=3...0, + * ~~~ + * + * \param [in] a unsigned long long type of value stored in a + * \param [in] b unsigned long long type of value stored in b + * \return value stored in unsigned long long type + */ +__STATIC_FORCEINLINE unsigned long long __RV_DKSUB16(unsigned long long a, unsigned long long b) +{ + unsigned long long result; + __ASM volatile("dksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); + return result; +} +/* ===== Inline Function End for A.10. DKSUB16 ===== */ + +/* ===== Inline Function Start for A.11.1. EXPD80 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief EXPD80 (Expand and Copy Byte 0 to 32bit) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * EXPD80 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Copy 8-bit data from 32-bit chunks into 4 bytes in a register. + * + * **Description**:\n + * Moves Rs1.B[0][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0] + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:0] = CONCAT(Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0]); + * for RV32: x=0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_EXPD80(unsigned long a) +{ + unsigned long result; + __ASM volatile("expd80 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for A11.1. EXPD80 ===== */ + +/* ===== Inline Function Start for A.11.2. EXPD81 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief EXPD81 (Expand and Copy Byte 1 to 32bit) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * EXPD81 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Copy 8-bit data from 32-bit chunks into 4 bytes in a register. + * + * **Description**:\n + * Moves Rs1.B[1][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0] + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:0] = CONCAT(Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0]); + * for RV32: x=0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_EXPD81(unsigned long a) +{ + unsigned long result; + __ASM volatile("expd81 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for A11.2. EXPD81 ===== */ + +/* ===== Inline Function Start for A.11.3. EXPD82 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief EXPD82 (Expand and Copy Byte 2 to 32bit) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * EXPD82 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Copy 8-bit data from 32-bit chunks into 4 bytes in a register. + * + * **Description**:\n + * Moves Rs1.B[2][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0] + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:0] = CONCAT(Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0]); + * for RV32: x=0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_EXPD82(unsigned long a) +{ + unsigned long result; + __ASM volatile("expd82 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for A11.3. EXPD82 ===== */ + +/* ===== Inline Function Start for A.11.4. EXPD83 ===== */ +/** + * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM + * \brief EXPD83 (Expand and Copy Byte 3 to 32bit) + * \details + * **Type**: DSP + * + * **Syntax**:\n + * ~~~ + * EXPD83 Rd, Rs1 + * ~~~ + * + * **Purpose**:\n + * Copy 8-bit data from 32-bit chunks into 4 bytes in a register. + * + * **Description**:\n + * Moves Rs1.B[3][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0] + * + * **Operations**:\n + * ~~~ + * Rd.W[x][31:0] = CONCAT(Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0]); + * for RV32: x=0 + * ~~~ + * + * \param [in] a unsigned long type of value stored in a + * \return value stored in unsigned long type + */ +__STATIC_FORCEINLINE unsigned long __RV_EXPD83(unsigned long a) +{ + unsigned long result; + __ASM volatile("expd83 %0, %1" : "=r"(result) : "r"(a)); + return result; +} +/* ===== Inline Function End for A11.4. EXPD83 ===== */ +#endif /* __RISCV_XLEN == 32 */ + +#if defined(__RISCV_FEATURE_DSP) && (__RISCV_FEATURE_DSP == 1) +/* XXXXX ARM Compatiable SIMD API XXXXX */ +/** \brief Q setting quad 8-bit saturating addition. */ +#define __QADD8(x, y) __RV_KADD8(x, y) +/** \brief Q setting quad 8-bit saturating subtract. */ +#define __QSUB8(x, y) __RV_KSUB8((x), (y)) +/** \brief Q setting dual 16-bit saturating addition. */ +#define __QADD16(x, y) __RV_KADD16((x), (y)) +/** \brief Dual 16-bit signed addition with halved results. */ +#define __SHADD16(x, y) __RV_RADD16((x), (y)) +/** \brief Q setting dual 16-bit saturating subtract. */ +#define __QSUB16(x, y) __RV_KSUB16((x), (y)) +/** \brief Dual 16-bit signed subtraction with halved results. */ +#define __SHSUB16(x, y) __RV_RSUB16((x), (y)) +/** \brief Q setting dual 16-bit add and subtract with exchange. */ +#define __QASX(x, y) __RV_KCRAS16((x), (y)) +/** \brief Dual 16-bit signed addition and subtraction with halved results.*/ +#define __SHASX(x, y) __RV_RCRAS16((x), (y)) +/** \brief Q setting dual 16-bit subtract and add with exchange. */ +#define __QSAX(x, y) __RV_KCRSA16((x), (y)) +/** \brief Dual 16-bit signed subtraction and addition with halved results.*/ +#define __SHSAX(x, y) __RV_RCRSA16((x), (y)) +/** \brief Dual 16-bit signed multiply with exchange returning difference. */ +#define __SMUSDX(x, y) __RV_SMXDS((y), (x)) +/** \brief Q setting sum of dual 16-bit signed multiply with exchange. */ +__STATIC_FORCEINLINE int32_t __SMUADX (int32_t op1, int32_t op2) +{ + return (int32_t)__RV_KMXDA(op1, op2); +} +/** \brief Q setting saturating add. */ +#define __QADD(x, y) __RV_KADDW((x), (y)) +/** \brief Q setting saturating subtract. */ +#define __QSUB(x, y) __RV_KSUBW((x), (y)) +/** \brief Q setting dual 16-bit signed multiply with single 32-bit accumulator. */ +__STATIC_FORCEINLINE int32_t __SMLAD(int32_t op1, int32_t op2, int32_t op3) +{ + return (int32_t)__RV_KMADA(op3, op1, op2); +} +/** \brief Q setting pre-exchanged dual 16-bit signed multiply with single 32-bit accumulator. */ +__STATIC_FORCEINLINE int32_t __SMLADX(int32_t op1, int32_t op2, int32_t op3) +{ + return (int32_t)__RV_KMAXDA(op3, op1, op2); +} +/** \brief Q setting dual 16-bit signed multiply with exchange subtract with 32-bit accumulate. */ +__STATIC_FORCEINLINE int32_t __SMLSDX(int32_t op1, int32_t op2, int32_t op3) +{ + return (op3 - (int32_t)__RV_SMXDS(op1, op2)); +} +/** \brief Dual 16-bit signed multiply with single 64-bit accumulator. */ +__STATIC_FORCEINLINE int64_t __SMLALD(int32_t op1, int32_t op2, int64_t acc) +{ + return (int64_t)__RV_SMALDA(acc, op1, op2); +} +/** \brief Dual 16-bit signed multiply with exchange with single 64-bit accumulator. */ +__STATIC_FORCEINLINE int64_t __SMLALDX(int32_t op1, int32_t op2, int64_t acc) +{ + return (int64_t)__RV_SMALXDA(acc, op1, op2); +} +/** \brief Q setting sum of dual 16-bit signed multiply. */ +__STATIC_FORCEINLINE int32_t __SMUAD(int32_t op1, int32_t op2) +{ + return (int32_t)__RV_KMDA(op1, op2); +} +/** \brief Dual 16-bit signed multiply returning difference. */ +__STATIC_FORCEINLINE int32_t __SMUSD(int32_t op1, int32_t op2) +{ + return (int32_t)__RV_SMDRS(op1, op2); +} +/** \brief Dual extract 8-bits and sign extend each to 16-bits. */ +#define __SXTB16(x) __RV_SUNPKD820(x) +/** \brief Dual extracted 8-bit to 16-bit signed addition. TODO Need test */ +__STATIC_FORCEINLINE int32_t __SXTAB16(uint32_t op1, uint32_t op2) +{ + return __RV_ADD16(op1, __RV_SUNPKD830(op2)); +} +/** \brief 32-bit signed multiply with 32-bit truncated accumulator. */ +__STATIC_FORCEINLINE int32_t __SMMLA(int32_t op1, int32_t op2, int32_t op3) +{ + int32_t mul; + mul = (int32_t)__RV_SMMUL(op1, op2); + return (op3 + mul); +} +#define __DKHM8 __RV_DKHM8 +#define __DKHM16 __RV_DKHM16 +#define __DKSUB16 __RV_DKSUB16 +#define __SMAQA __RV_SMAQA +#define __MULSR64 __RV_MULSR64 +#define __DQADD8 __RV_DKADD8 +#define __DQSUB8 __RV_DKSUB8 +#define __DKADD16 __RV_DKADD16 +#define __PKBB16 __RV_PKBB16 +#define __DKSLRA16 __RV_DKSLRA16 +#define __DKSLRA8 __RV_DKSLRA8 +#define __KABSW __RV_KABSW +#define __DKABS8 __RV_DKABS8 +#define __DKABS16 __RV_DKABS16 +#define __SMALDA __RV_SMALDA +#define __SMSLDA __RV_SMSLDA +#define __SMALBB __RV_SMALBB +#define __SUB64 __RV_SUB64 +#define __ADD64 __RV_ADD64 +#define __SMBB16 __RV_SMBB16 +#define __SMBT16 __RV_SMBT16 +#define __SMTT16 __RV_SMTT16 +#define __EXPD80 __RV_EXPD80 +#define __SMAX8 __RV_SMAX8 +#define __SMAX16 __RV_SMAX16 +#define __PKTT16 __RV_PKTT16 +#define __KADD16 __RV_KADD16 +#define __SADD16 __RV_ADD16 + +#endif /* (__RISCV_FEATURE_DSP == 1) */ + +#endif /* defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) */ + +/** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3. */ +#define __PKHBT(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \ + ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL) ) +/** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3. */ +#define __PKHTB(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \ + ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL) ) + +#ifdef __cplusplus +} +#endif + +#endif /* __CORE_FEATURE_DSP__ */ diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_eclic.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_eclic.h new file mode 100644 index 000000000..e2075471a --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_eclic.h @@ -0,0 +1,897 @@ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CORE_FEATURE_ECLIC__ +#define __CORE_FEATURE_ECLIC__ +/*! + * @file core_feature_eclic.h + * @brief ECLIC feature API header file for Nuclei N/NX Core + */ +/* + * ECLIC Feature Configuration Macro: + * 1. __ECLIC_PRESENT: Define whether Enhanced Core Local Interrupt Controller (ECLIC) Unit is present or not + * * 0: Not present + * * 1: Present + * 2. __ECLIC_BASEADDR: Base address of the ECLIC unit. + * 3. ECLIC_GetInfoCtlbits(): Define the number of hardware bits are actually implemented in the clicintctl registers. + * Valid number is 1 - 8. + * 4. __ECLIC_INTNUM : Define the external interrupt number of ECLIC Unit + * + */ +#ifdef __cplusplus + extern "C" { +#endif + +#if defined(__ECLIC_PRESENT) && (__ECLIC_PRESENT == 1) +/** + * \defgroup NMSIS_Core_ECLIC_Registers Register Define and Type Definitions Of ECLIC + * \ingroup NMSIS_Core_Registers + * \brief Type definitions and defines for eclic registers. + * + * @{ + */ + +/** + * \brief Union type to access CLICFG configure register. + */ +typedef union +{ + struct { + uint8_t _reserved0:1; /*!< bit: 0 Overflow condition code flag */ + uint8_t nlbits:4; /*!< bit: 29 Carry condition code flag */ + uint8_t _reserved1:2; /*!< bit: 30 Zero condition code flag */ + uint8_t _reserved2:1; /*!< bit: 31 Negative condition code flag */ + } b; /*!< Structure used for bit access */ + uint8_t w; /*!< Type used for byte access */ +} CLICCFG_Type; + +/** + * \brief Union type to access CLICINFO information register. + */ +typedef union { + struct { + uint32_t numint:13; /*!< bit: 0..12 number of maximum interrupt inputs supported */ + uint32_t version:8; /*!< bit: 13..20 20:17 for architecture version,16:13 for implementation version */ + uint32_t intctlbits:4; /*!< bit: 21..24 specifies how many hardware bits are actually implemented in the clicintctl registers */ + uint32_t _reserved0:8; /*!< bit: 25..31 Reserved */ + } b; /*!< Structure used for bit access */ + uint32_t w; /*!< Type used for word access */ +} CLICINFO_Type; + +/** + * \brief Access to the structure of a vector interrupt controller. + */ +typedef struct { + __IOM uint8_t INTIP; /*!< Offset: 0x000 (R/W) Interrupt set pending register */ + __IOM uint8_t INTIE; /*!< Offset: 0x001 (R/W) Interrupt set enable register */ + __IOM uint8_t INTATTR; /*!< Offset: 0x002 (R/W) Interrupt set attributes register */ + __IOM uint8_t INTCTRL; /*!< Offset: 0x003 (R/W) Interrupt configure register */ +} CLIC_CTRL_Type; + +typedef struct { + __IOM uint8_t CFG; /*!< Offset: 0x000 (R/W) CLIC configuration register */ + uint8_t RESERVED0[3]; + __IM uint32_t INFO; /*!< Offset: 0x004 (R/ ) CLIC information register */ + uint8_t RESERVED1[3]; + __IOM uint8_t MTH; /*!< Offset: 0x00B (R/W) CLIC machine mode threshold register */ + uint32_t RESERVED2[0x3FD]; + CLIC_CTRL_Type CTRL[4096]; /*!< Offset: 0x1000 (R/W) CLIC register structure for INTIP, INTIE, INTATTR, INTCTL */ +} CLIC_Type; + +#define CLIC_CLICCFG_NLBIT_Pos 1U /*!< CLIC CLICCFG: NLBIT Position */ +#define CLIC_CLICCFG_NLBIT_Msk (0xFUL << CLIC_CLICCFG_NLBIT_Pos) /*!< CLIC CLICCFG: NLBIT Mask */ + +#define CLIC_CLICINFO_CTLBIT_Pos 21U /*!< CLIC INTINFO: __ECLIC_GetInfoCtlbits() Position */ +#define CLIC_CLICINFO_CTLBIT_Msk (0xFUL << CLIC_CLICINFO_CTLBIT_Pos) /*!< CLIC INTINFO: __ECLIC_GetInfoCtlbits() Mask */ + +#define CLIC_CLICINFO_VER_Pos 13U /*!< CLIC CLICINFO: VERSION Position */ +#define CLIC_CLICINFO_VER_Msk (0xFFUL << CLIC_CLICCFG_NLBIT_Pos) /*!< CLIC CLICINFO: VERSION Mask */ + +#define CLIC_CLICINFO_NUM_Pos 0U /*!< CLIC CLICINFO: NUM Position */ +#define CLIC_CLICINFO_NUM_Msk (0xFFFUL << CLIC_CLICINFO_NUM_Pos) /*!< CLIC CLICINFO: NUM Mask */ + +#define CLIC_INTIP_IP_Pos 0U /*!< CLIC INTIP: IP Position */ +#define CLIC_INTIP_IP_Msk (0x1UL << CLIC_INTIP_IP_Pos) /*!< CLIC INTIP: IP Mask */ + +#define CLIC_INTIE_IE_Pos 0U /*!< CLIC INTIE: IE Position */ +#define CLIC_INTIE_IE_Msk (0x1UL << CLIC_INTIE_IE_Pos) /*!< CLIC INTIE: IE Mask */ + +#define CLIC_INTATTR_TRIG_Pos 1U /*!< CLIC INTATTR: TRIG Position */ +#define CLIC_INTATTR_TRIG_Msk (0x3UL << CLIC_INTATTR_TRIG_Pos) /*!< CLIC INTATTR: TRIG Mask */ + +#define CLIC_INTATTR_SHV_Pos 0U /*!< CLIC INTATTR: SHV Position */ +#define CLIC_INTATTR_SHV_Msk (0x1UL << CLIC_INTATTR_SHV_Pos) /*!< CLIC INTATTR: SHV Mask */ + +#define ECLIC_MAX_NLBITS 8U /*!< Max nlbit of the CLICINTCTLBITS */ +#define ECLIC_MODE_MTVEC_Msk 3U /*!< ECLIC Mode mask for MTVT CSR Register */ + +#define ECLIC_NON_VECTOR_INTERRUPT 0x0 /*!< Non-Vector Interrupt Mode of ECLIC */ +#define ECLIC_VECTOR_INTERRUPT 0x1 /*!< Vector Interrupt Mode of ECLIC */ + +/**\brief ECLIC Trigger Enum for different Trigger Type */ +typedef enum ECLIC_TRIGGER { + ECLIC_LEVEL_TRIGGER = 0x0, /*!< Level Triggerred, trig[0] = 0 */ + ECLIC_POSTIVE_EDGE_TRIGGER = 0x1, /*!< Postive/Rising Edge Triggered, trig[1] = 1, trig[0] = 0 */ + ECLIC_NEGTIVE_EDGE_TRIGGER = 0x3, /*!< Negtive/Falling Edge Triggered, trig[1] = 1, trig[0] = 0 */ + ECLIC_MAX_TRIGGER = 0x3 /*!< MAX Supported Trigger Mode */ +} ECLIC_TRIGGER_Type; + +#ifndef __ECLIC_BASEADDR +/* Base address of ECLIC(__ECLIC_BASEADDR) should be defined in */ +#error "__ECLIC_BASEADDR is not defined, please check!" +#endif + +#ifndef __ECLIC_INTCTLBITS +/* Define __ECLIC_INTCTLBITS to get via ECLIC->INFO if not defined */ +#define __ECLIC_INTCTLBITS (__ECLIC_GetInfoCtlbits()) +#endif + +/* ECLIC Memory mapping of Device */ +#define ECLIC_BASE __ECLIC_BASEADDR /*!< ECLIC Base Address */ +#define ECLIC ((CLIC_Type *) ECLIC_BASE) /*!< CLIC configuration struct */ + +/** @} */ /* end of group NMSIS_Core_ECLIC_Registers */ + +/* ########################## ECLIC functions #################################### */ +/** + * \defgroup NMSIS_Core_IntExc Interrupts and Exceptions + * \brief Functions that manage interrupts and exceptions via the ECLIC. + * + * @{ + */ + +/** + * \brief Definition of IRQn numbers + * \details + * The core interrupt enumeration names for IRQn values are defined in the file .h. + * - Interrupt ID(IRQn) from 0 to 18 are reserved for core internal interrupts. + * - Interrupt ID(IRQn) start from 19 represent device-specific external interrupts. + * - The first device-specific interrupt has the IRQn value 19. + * + * The table below describes the core interrupt names and their availability in various Nuclei Cores. + */ +/* The following enum IRQn definition in this file + * is only used for doxygen documentation generation, + * The .h is the real file to define it by vendor + */ +#if defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__) +typedef enum IRQn { + /* ========= Nuclei N/NX Core Specific Interrupt Numbers =========== */ + /* Core Internal Interrupt IRQn definitions */ + Reserved0_IRQn = 0, /*!< Internal reserved */ + Reserved1_IRQn = 1, /*!< Internal reserved */ + Reserved2_IRQn = 2, /*!< Internal reserved */ + SysTimerSW_IRQn = 3, /*!< System Timer SW interrupt */ + Reserved3_IRQn = 4, /*!< Internal reserved */ + Reserved4_IRQn = 5, /*!< Internal reserved */ + Reserved5_IRQn = 6, /*!< Internal reserved */ + SysTimer_IRQn = 7, /*!< System Timer Interrupt */ + Reserved6_IRQn = 8, /*!< Internal reserved */ + Reserved7_IRQn = 9, /*!< Internal reserved */ + Reserved8_IRQn = 10, /*!< Internal reserved */ + Reserved9_IRQn = 11, /*!< Internal reserved */ + Reserved10_IRQn = 12, /*!< Internal reserved */ + Reserved11_IRQn = 13, /*!< Internal reserved */ + Reserved12_IRQn = 14, /*!< Internal reserved */ + Reserved13_IRQn = 15, /*!< Internal reserved */ + Reserved14_IRQn = 16, /*!< Internal reserved */ + Reserved15_IRQn = 17, /*!< Internal reserved */ + Reserved16_IRQn = 18, /*!< Internal reserved */ + + /* ========= Device Specific Interrupt Numbers =================== */ + /* ToDo: add here your device specific external interrupt numbers. + * 19~max(NUM_INTERRUPT, 1023) is reserved number for user. + * Maxmum interrupt supported could get from clicinfo.NUM_INTERRUPT. + * According the interrupt handlers defined in startup_Device.S + * eg.: Interrupt for Timer#1 eclic_tim1_handler -> TIM1_IRQn */ + FirstDeviceSpecificInterrupt_IRQn = 19, /*!< First Device Specific Interrupt */ + SOC_INT_MAX, /*!< Number of total interrupts */ +} IRQn_Type; +#endif /* __ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__ */ + +#ifdef NMSIS_ECLIC_VIRTUAL + #ifndef NMSIS_ECLIC_VIRTUAL_HEADER_FILE + #define NMSIS_ECLIC_VIRTUAL_HEADER_FILE "nmsis_eclic_virtual.h" + #endif + #include NMSIS_ECLIC_VIRTUAL_HEADER_FILE +#else + #define ECLIC_SetCfgNlbits __ECLIC_SetCfgNlbits + #define ECLIC_GetCfgNlbits __ECLIC_GetCfgNlbits + #define ECLIC_GetInfoVer __ECLIC_GetInfoVer + #define ECLIC_GetInfoCtlbits __ECLIC_GetInfoCtlbits + #define ECLIC_GetInfoNum __ECLIC_GetInfoNum + #define ECLIC_SetMth __ECLIC_SetMth + #define ECLIC_GetMth __ECLIC_GetMth + #define ECLIC_EnableIRQ __ECLIC_EnableIRQ + #define ECLIC_GetEnableIRQ __ECLIC_GetEnableIRQ + #define ECLIC_DisableIRQ __ECLIC_DisableIRQ + #define ECLIC_SetPendingIRQ __ECLIC_SetPendingIRQ + #define ECLIC_GetPendingIRQ __ECLIC_GetPendingIRQ + #define ECLIC_ClearPendingIRQ __ECLIC_ClearPendingIRQ + #define ECLIC_SetTrigIRQ __ECLIC_SetTrigIRQ + #define ECLIC_GetTrigIRQ __ECLIC_GetTrigIRQ + #define ECLIC_SetShvIRQ __ECLIC_SetShvIRQ + #define ECLIC_GetShvIRQ __ECLIC_GetShvIRQ + #define ECLIC_SetCtrlIRQ __ECLIC_SetCtrlIRQ + #define ECLIC_GetCtrlIRQ __ECLIC_GetCtrlIRQ + #define ECLIC_SetLevelIRQ __ECLIC_SetLevelIRQ + #define ECLIC_GetLevelIRQ __ECLIC_GetLevelIRQ + #define ECLIC_SetPriorityIRQ __ECLIC_SetPriorityIRQ + #define ECLIC_GetPriorityIRQ __ECLIC_GetPriorityIRQ + +#endif /* NMSIS_ECLIC_VIRTUAL */ + +#ifdef NMSIS_VECTAB_VIRTUAL + #ifndef NMSIS_VECTAB_VIRTUAL_HEADER_FILE + #define NMSIS_VECTAB_VIRTUAL_HEADER_FILE "nmsis_vectab_virtual.h" + #endif + #include NMSIS_VECTAB_VIRTUAL_HEADER_FILE +#else + #define ECLIC_SetVector __ECLIC_SetVector + #define ECLIC_GetVector __ECLIC_GetVector +#endif /* (NMSIS_VECTAB_VIRTUAL) */ + +/** + * \brief Set nlbits value + * \details + * This function set the nlbits value of CLICCFG register. + * \param [in] nlbits nlbits value + * \remarks + * - nlbits is used to set the width of level in the CLICINTCTL[i]. + * \sa + * - \ref ECLIC_GetCfgNlbits + */ +__STATIC_FORCEINLINE void __ECLIC_SetCfgNlbits(uint32_t nlbits) +{ + ECLIC->CFG &= ~CLIC_CLICCFG_NLBIT_Msk; + ECLIC->CFG |= (uint8_t)((nlbits <CFG & CLIC_CLICCFG_NLBIT_Msk) >> CLIC_CLICCFG_NLBIT_Pos)); +} + +/** + * \brief Get the ECLIC version number + * \details + * This function gets the hardware version information from CLICINFO register. + * \return hardware version number in CLICINFO register. + * \remarks + * - This function gets harware version information from CLICINFO register. + * - Bit 20:17 for architecture version, bit 16:13 for implementation version. + * \sa + * - \ref ECLIC_GetInfoNum +*/ +__STATIC_FORCEINLINE uint32_t __ECLIC_GetInfoVer(void) +{ + return ((uint32_t)((ECLIC->INFO & CLIC_CLICINFO_VER_Msk) >> CLIC_CLICINFO_VER_Pos)); +} + +/** + * \brief Get CLICINTCTLBITS + * \details + * This function gets CLICINTCTLBITS from CLICINFO register. + * \return CLICINTCTLBITS from CLICINFO register. + * \remarks + * - In the CLICINTCTL[i] registers, with 2 <= CLICINTCTLBITS <= 8. + * - The implemented bits are kept left-justified in the most-significant bits of each 8-bit + * CLICINTCTL[I] register, with the lower unimplemented bits treated as hardwired to 1. + * \sa + * - \ref ECLIC_GetInfoNum + */ +__STATIC_FORCEINLINE uint32_t __ECLIC_GetInfoCtlbits(void) +{ + return ((uint32_t)((ECLIC->INFO & CLIC_CLICINFO_CTLBIT_Msk) >> CLIC_CLICINFO_CTLBIT_Pos)); +} + +/** + * \brief Get number of maximum interrupt inputs supported + * \details + * This function gets number of maximum interrupt inputs supported from CLICINFO register. + * \return number of maximum interrupt inputs supported from CLICINFO register. + * \remarks + * - This function gets number of maximum interrupt inputs supported from CLICINFO register. + * - The num_interrupt field specifies the actual number of maximum interrupt inputs supported in this implementation. + * \sa + * - \ref ECLIC_GetInfoCtlbits + */ +__STATIC_FORCEINLINE uint32_t __ECLIC_GetInfoNum(void) +{ + return ((uint32_t)((ECLIC->INFO & CLIC_CLICINFO_NUM_Msk) >> CLIC_CLICINFO_NUM_Pos)); +} + +/** + * \brief Set Machine Mode Interrupt Level Threshold + * \details + * This function sets machine mode interrupt level threshold. + * \param [in] mth Interrupt Level Threshold. + * \sa + * - \ref ECLIC_GetMth + */ +__STATIC_FORCEINLINE void __ECLIC_SetMth(uint8_t mth) +{ + ECLIC->MTH = mth; +} + +/** + * \brief Get Machine Mode Interrupt Level Threshold + * \details + * This function gets machine mode interrupt level threshold. + * \return Interrupt Level Threshold. + * \sa + * - \ref ECLIC_SetMth + */ +__STATIC_FORCEINLINE uint8_t __ECLIC_GetMth(void) +{ + return (ECLIC->MTH); +} + + +/** + * \brief Enable a specific interrupt + * \details + * This function enables the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \remarks + * - IRQn must not be negative. + * \sa + * - \ref ECLIC_DisableIRQ + */ +__STATIC_FORCEINLINE void __ECLIC_EnableIRQ(IRQn_Type IRQn) +{ + ECLIC->CTRL[IRQn].INTIE |= CLIC_INTIE_IE_Msk; +} + +/** + * \brief Get a specific interrupt enable status + * \details + * This function returns the interrupt enable status for the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \returns + * - 0 Interrupt is not enabled + * - 1 Interrupt is pending + * \remarks + * - IRQn must not be negative. + * \sa + * - \ref ECLIC_EnableIRQ + * - \ref ECLIC_DisableIRQ + */ +__STATIC_FORCEINLINE uint32_t __ECLIC_GetEnableIRQ(IRQn_Type IRQn) +{ + return((uint32_t) (ECLIC->CTRL[IRQn].INTIE) & CLIC_INTIE_IE_Msk); +} + +/** + * \brief Disable a specific interrupt + * \details + * This function disables the specific interrupt \em IRQn. + * \param [in] IRQn Number of the external interrupt to disable + * \remarks + * - IRQn must not be negative. + * \sa + * - \ref ECLIC_EnableIRQ + */ +__STATIC_FORCEINLINE void __ECLIC_DisableIRQ(IRQn_Type IRQn) +{ + ECLIC->CTRL[IRQn].INTIE &= ~CLIC_INTIE_IE_Msk; +} + +/** + * \brief Get the pending specific interrupt + * \details + * This function returns the pending status of the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \returns + * - 0 Interrupt is not pending + * - 1 Interrupt is pending + * \remarks + * - IRQn must not be negative. + * \sa + * - \ref ECLIC_SetPendingIRQ + * - \ref ECLIC_ClearPendingIRQ + */ +__STATIC_FORCEINLINE int32_t __ECLIC_GetPendingIRQ(IRQn_Type IRQn) +{ + return((uint32_t)(ECLIC->CTRL[IRQn].INTIP) & CLIC_INTIP_IP_Msk); +} + +/** + * \brief Set a specific interrupt to pending + * \details + * This function sets the pending bit for the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \remarks + * - IRQn must not be negative. + * \sa + * - \ref ECLIC_GetPendingIRQ + * - \ref ECLIC_ClearPendingIRQ + */ +__STATIC_FORCEINLINE void __ECLIC_SetPendingIRQ(IRQn_Type IRQn) +{ + ECLIC->CTRL[IRQn].INTIP |= CLIC_INTIP_IP_Msk; +} + +/** + * \brief Clear a specific interrupt from pending + * \details + * This function removes the pending state of the specific interrupt \em IRQn. + * \em IRQn cannot be a negative number. + * \param [in] IRQn Interrupt number + * \remarks + * - IRQn must not be negative. + * \sa + * - \ref ECLIC_SetPendingIRQ + * - \ref ECLIC_GetPendingIRQ + */ +__STATIC_FORCEINLINE void __ECLIC_ClearPendingIRQ(IRQn_Type IRQn) +{ + ECLIC->CTRL[IRQn].INTIP &= ~ CLIC_INTIP_IP_Msk; +} + +/** + * \brief Set trigger mode and polarity for a specific interrupt + * \details + * This function set trigger mode and polarity of the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \param [in] trig + * - 00 level trigger, \ref ECLIC_LEVEL_TRIGGER + * - 01 positive edge trigger, \ref ECLIC_POSTIVE_EDGE_TRIGGER + * - 02 level trigger, \ref ECLIC_LEVEL_TRIGGER + * - 03 negative edge trigger, \ref ECLIC_NEGTIVE_EDGE_TRIGGER + * \remarks + * - IRQn must not be negative. + * + * \sa + * - \ref ECLIC_GetTrigIRQ + */ +__STATIC_FORCEINLINE void __ECLIC_SetTrigIRQ(IRQn_Type IRQn, uint32_t trig) +{ + ECLIC->CTRL[IRQn].INTATTR &= ~CLIC_INTATTR_TRIG_Msk; + ECLIC->CTRL[IRQn].INTATTR |= (uint8_t)(trig<CTRL[IRQn].INTATTR) & CLIC_INTATTR_TRIG_Msk)>>CLIC_INTATTR_TRIG_Pos)); +} + +/** + * \brief Set interrupt working mode for a specific interrupt + * \details + * This function set selective hardware vector or non-vector working mode of the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \param [in] shv + * - 0 non-vector mode, \ref ECLIC_NON_VECTOR_INTERRUPT + * - 1 vector mode, \ref ECLIC_VECTOR_INTERRUPT + * \remarks + * - IRQn must not be negative. + * \sa + * - \ref ECLIC_GetShvIRQ + */ +__STATIC_FORCEINLINE void __ECLIC_SetShvIRQ(IRQn_Type IRQn, uint32_t shv) +{ + ECLIC->CTRL[IRQn].INTATTR &= ~CLIC_INTATTR_SHV_Msk; + ECLIC->CTRL[IRQn].INTATTR |= (uint8_t)(shv<CTRL[IRQn].INTATTR) & CLIC_INTATTR_SHV_Msk)>>CLIC_INTATTR_SHV_Pos)); +} + +/** + * \brief Modify ECLIC Interrupt Input Control Register for a specific interrupt + * \details + * This function modify ECLIC Interrupt Input Control(CLICINTCTL[i]) register of the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \param [in] intctrl Set value for CLICINTCTL[i] register + * \remarks + * - IRQn must not be negative. + * \sa + * - \ref ECLIC_GetCtrlIRQ + */ +__STATIC_FORCEINLINE void __ECLIC_SetCtrlIRQ(IRQn_Type IRQn, uint8_t intctrl) +{ + ECLIC->CTRL[IRQn].INTCTRL = intctrl; +} + +/** + * \brief Get ECLIC Interrupt Input Control Register value for a specific interrupt + * \details + * This function modify ECLIC Interrupt Input Control register of the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \return value of ECLIC Interrupt Input Control register + * \remarks + * - IRQn must not be negative. + * \sa + * - \ref ECLIC_SetCtrlIRQ + */ +__STATIC_FORCEINLINE uint8_t __ECLIC_GetCtrlIRQ(IRQn_Type IRQn) +{ + return (ECLIC->CTRL[IRQn].INTCTRL); +} + +/** + * \brief Set ECLIC Interrupt level of a specific interrupt + * \details + * This function set interrupt level of the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \param [in] lvl_abs Interrupt level + * \remarks + * - IRQn must not be negative. + * - If lvl_abs to be set is larger than the max level allowed, it will be force to be max level. + * - When you set level value you need use clciinfo.nlbits to get the width of level. + * Then we could know the maximum of level. CLICINTCTLBITS is how many total bits are + * present in the CLICINTCTL register. + * \sa + * - \ref ECLIC_GetLevelIRQ + */ +__STATIC_FORCEINLINE void __ECLIC_SetLevelIRQ(IRQn_Type IRQn, uint8_t lvl_abs) +{ + uint8_t nlbits = __ECLIC_GetCfgNlbits(); + uint8_t intctlbits = (uint8_t)__ECLIC_INTCTLBITS; + + if (nlbits == 0) { + return; + } + + if (nlbits > intctlbits) { + nlbits = intctlbits; + } + uint8_t maxlvl = ((1 << nlbits) - 1); + if (lvl_abs > maxlvl) { + lvl_abs = maxlvl; + } + uint8_t lvl = lvl_abs << (ECLIC_MAX_NLBITS - nlbits); + uint8_t cur_ctrl = __ECLIC_GetCtrlIRQ(IRQn); + cur_ctrl = cur_ctrl << nlbits; + cur_ctrl = cur_ctrl >> nlbits; + __ECLIC_SetCtrlIRQ(IRQn, (cur_ctrl | lvl)); +} + +/** + * \brief Get ECLIC Interrupt level of a specific interrupt + * \details + * This function get interrupt level of the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \return Interrupt level + * \remarks + * - IRQn must not be negative. + * \sa + * - \ref ECLIC_SetLevelIRQ + */ +__STATIC_FORCEINLINE uint8_t __ECLIC_GetLevelIRQ(IRQn_Type IRQn) +{ + uint8_t nlbits = __ECLIC_GetCfgNlbits(); + uint8_t intctlbits = (uint8_t)__ECLIC_INTCTLBITS; + + if (nlbits == 0) { + return 0; + } + + if (nlbits > intctlbits) { + nlbits = intctlbits; + } + uint8_t intctrl = __ECLIC_GetCtrlIRQ(IRQn); + uint8_t lvl_abs = intctrl >> (ECLIC_MAX_NLBITS - nlbits); + return lvl_abs; +} + +/** + * \brief Get ECLIC Interrupt priority of a specific interrupt + * \details + * This function get interrupt priority of the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \param [in] pri Interrupt priority + * \remarks + * - IRQn must not be negative. + * - If pri to be set is larger than the max priority allowed, it will be force to be max priority. + * - Priority width is CLICINTCTLBITS minus clciinfo.nlbits if clciinfo.nlbits + * is less than CLICINTCTLBITS. Otherwise priority width is 0. + * \sa + * - \ref ECLIC_GetPriorityIRQ + */ +__STATIC_FORCEINLINE void __ECLIC_SetPriorityIRQ(IRQn_Type IRQn, uint8_t pri) +{ + uint8_t nlbits = __ECLIC_GetCfgNlbits(); + uint8_t intctlbits = (uint8_t)__ECLIC_INTCTLBITS; + if (nlbits < intctlbits) { + uint8_t maxpri = ((1 << (intctlbits - nlbits)) - 1); + if (pri > maxpri) { + pri = maxpri; + } + pri = pri << (ECLIC_MAX_NLBITS - intctlbits); + uint8_t mask = ((uint8_t)(-1)) >> intctlbits; + pri = pri | mask; + uint8_t cur_ctrl = __ECLIC_GetCtrlIRQ(IRQn); + cur_ctrl = cur_ctrl >> (ECLIC_MAX_NLBITS - nlbits); + cur_ctrl = cur_ctrl << (ECLIC_MAX_NLBITS - nlbits); + __ECLIC_SetCtrlIRQ(IRQn, (cur_ctrl | pri)); + } +} + +/** + * \brief Get ECLIC Interrupt priority of a specific interrupt + * \details + * This function get interrupt priority of the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \return Interrupt priority + * \remarks + * - IRQn must not be negative. + * \sa + * - \ref ECLIC_SetPriorityIRQ + */ +__STATIC_FORCEINLINE uint8_t __ECLIC_GetPriorityIRQ(IRQn_Type IRQn) +{ + uint8_t nlbits = __ECLIC_GetCfgNlbits(); + uint8_t intctlbits = (uint8_t)__ECLIC_INTCTLBITS; + if (nlbits < intctlbits) { + uint8_t cur_ctrl = __ECLIC_GetCtrlIRQ(IRQn); + uint8_t pri = cur_ctrl << nlbits; + pri = pri >> nlbits; + pri = pri >> (ECLIC_MAX_NLBITS - intctlbits); + return pri; + } else { + return 0; + } +} + +/** + * \brief Set Interrupt Vector of a specific interrupt + * \details + * This function set interrupt handler address of the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \param [in] vector Interrupt handler address + * \remarks + * - IRQn must not be negative. + * - You can set the \ref CSR_CSR_MTVT to set interrupt vector table entry address. + * - If your vector table is placed in readonly section, the vector for IRQn will not be modified. + * For this case, you need to use the correct irq handler name defined in your vector table as + * your irq handler function name. + * - This function will only work correctly when the vector table is placed in an read-write enabled section. + * \sa + * - \ref ECLIC_GetVector + */ +__STATIC_FORCEINLINE void __ECLIC_SetVector(IRQn_Type IRQn, rv_csr_t vector) +{ +#if __RISCV_XLEN == 32 + volatile uint32_t vec_base; + vec_base = ((uint32_t)__RV_CSR_READ(CSR_MTVT)); + (* (unsigned long *) (vec_base + ((int32_t)IRQn) * 4)) = vector; +#elif __RISCV_XLEN == 64 + volatile uint64_t vec_base; + vec_base = ((uint64_t)__RV_CSR_READ(CSR_MTVT)); + (* (unsigned long *) (vec_base + ((int32_t)IRQn) * 8)) = vector; +#else // TODO Need cover for XLEN=128 case in future + volatile uint64_t vec_base; + vec_base = ((uint64_t)__RV_CSR_READ(CSR_MTVT)); + (* (unsigned long *) (vec_base + ((int32_t)IRQn) * 8)) = vector; +#endif +} + +/** + * \brief Get Interrupt Vector of a specific interrupt + * \details + * This function get interrupt handler address of the specific interrupt \em IRQn. + * \param [in] IRQn Interrupt number + * \return Interrupt handler address + * \remarks + * - IRQn must not be negative. + * - You can read \ref CSR_CSR_MTVT to get interrupt vector table entry address. + * \sa + * - \ref ECLIC_SetVector + */ +__STATIC_FORCEINLINE rv_csr_t __ECLIC_GetVector(IRQn_Type IRQn) +{ +#if __RISCV_XLEN == 32 + return (*(uint32_t *)(__RV_CSR_READ(CSR_MTVT)+IRQn*4)); +#elif __RISCV_XLEN == 64 + return (*(uint64_t *)(__RV_CSR_READ(CSR_MTVT)+IRQn*8)); +#else // TODO Need cover for XLEN=128 case in future + return (*(uint64_t *)(__RV_CSR_READ(CSR_MTVT)+IRQn*8)); +#endif +} + +/** + * \brief Set Exception entry address + * \details + * This function set exception handler address to 'CSR_MTVEC'. + * \param [in] addr Exception handler address + * \remarks + * - This function use to set exception handler address to 'CSR_MTVEC'. Address is 4 bytes align. + * \sa + * - \ref __get_exc_entry + */ +__STATIC_FORCEINLINE void __set_exc_entry(rv_csr_t addr) +{ + addr &= (rv_csr_t)(~0x3F); + addr |= ECLIC_MODE_MTVEC_Msk; + __RV_CSR_WRITE(CSR_MTVEC, addr); +} + +/** + * \brief Get Exception entry address + * \details + * This function get exception handler address from 'CSR_MTVEC'. + * \return Exception handler address + * \remarks + * - This function use to get exception handler address from 'CSR_MTVEC'. Address is 4 bytes align + * \sa + * - \ref __set_exc_entry + */ +__STATIC_FORCEINLINE rv_csr_t __get_exc_entry(void) +{ + unsigned long addr = __RV_CSR_READ(CSR_MTVEC); + return (addr & ~ECLIC_MODE_MTVEC_Msk); +} + +/** + * \brief Set Non-vector interrupt entry address + * \details + * This function set Non-vector interrupt address. + * \param [in] addr Non-vector interrupt entry address + * \remarks + * - This function use to set non-vector interrupt entry address to 'CSR_MTVT2' if + * - CSR_MTVT2 bit0 is 1. If 'CSR_MTVT2' bit0 is 0 then set address to 'CSR_MTVEC' + * \sa + * - \ref __get_nonvec_entry + */ +__STATIC_FORCEINLINE void __set_nonvec_entry(rv_csr_t addr) +{ + if (__RV_CSR_READ(CSR_MTVT2) & 0x1){ + __RV_CSR_WRITE(CSR_MTVT2, addr | 0x01); + } else { + addr &= (rv_csr_t)(~0x3F); + addr |= ECLIC_MODE_MTVEC_Msk; + __RV_CSR_WRITE(CSR_MTVEC, addr); + } +} + +/** + * \brief Get Non-vector interrupt entry address + * \details + * This function get Non-vector interrupt address. + * \return Non-vector interrupt handler address + * \remarks + * - This function use to get non-vector interrupt entry address from 'CSR_MTVT2' if + * - CSR_MTVT2 bit0 is 1. If 'CSR_MTVT2' bit0 is 0 then get address from 'CSR_MTVEC'. + * \sa + * - \ref __set_nonvec_entry + */ +__STATIC_FORCEINLINE rv_csr_t __get_nonvec_entry(void) +{ + if (__RV_CSR_READ(CSR_MTVT2) & 0x1) { + return __RV_CSR_READ(CSR_MTVT2) & (~(rv_csr_t)(0x1)); + } else { + rv_csr_t addr = __RV_CSR_READ(CSR_MTVEC); + return (addr & ~ECLIC_MODE_MTVEC_Msk); + } +} + +/** + * \brief Get NMI interrupt entry from 'CSR_MNVEC' + * \details + * This function get NMI interrupt address from 'CSR_MNVEC'. + * \return NMI interrupt handler address + * \remarks + * - This function use to get NMI interrupt handler address from 'CSR_MNVEC'. If CSR_MMISC_CTL[9] = 1 'CSR_MNVEC' + * - will be equal as mtvec. If CSR_MMISC_CTL[9] = 0 'CSR_MNVEC' will be equal as reset vector. + * - NMI entry is defined via \ref CSR_MMISC_CTL, writing to \ref CSR_MNVEC will be ignored. + */ +__STATIC_FORCEINLINE rv_csr_t __get_nmi_entry(void) +{ + return __RV_CSR_READ(CSR_MNVEC); +} + +/** + * \brief Save necessary CSRs into variables for vector interrupt nesting + * \details + * This macro is used to declare variables which are used for saving + * CSRs(MCAUSE, MEPC, MSUB), and it will read these CSR content into + * these variables, it need to be used in a vector-interrupt if nesting + * is required. + * \remarks + * - Interrupt will be enabled after this macro is called + * - It need to be used together with \ref RESTORE_IRQ_CSR_CONTEXT + * - Don't use variable names __mcause, __mpec, __msubm in your ISR code + * - If you want to enable interrupt nesting feature for vector interrupt, + * you can do it like this: + * \code + * // __INTERRUPT attribute will generates function entry and exit sequences suitable + * // for use in an interrupt handler when this attribute is present + * __INTERRUPT void eclic_mtip_handler(void) + * { + * // Must call this to save CSRs + * SAVE_IRQ_CSR_CONTEXT(); + * // !!!Interrupt is enabled here!!! + * // !!!Higher priority interrupt could nest it!!! + * + * // put you own interrupt handling code here + * + * // Must call this to restore CSRs + * RESTORE_IRQ_CSR_CONTEXT(); + * } + * \endcode + */ +#define SAVE_IRQ_CSR_CONTEXT() \ + rv_csr_t __mcause = __RV_CSR_READ(CSR_MCAUSE); \ + rv_csr_t __mepc = __RV_CSR_READ(CSR_MEPC); \ + rv_csr_t __msubm = __RV_CSR_READ(CSR_MSUBM); \ + __enable_irq(); + +/** + * \brief Restore necessary CSRs from variables for vector interrupt nesting + * \details + * This macro is used restore CSRs(MCAUSE, MEPC, MSUB) from pre-defined variables + * in \ref SAVE_IRQ_CSR_CONTEXT macro. + * \remarks + * - Interrupt will be disabled after this macro is called + * - It need to be used together with \ref SAVE_IRQ_CSR_CONTEXT + */ +#define RESTORE_IRQ_CSR_CONTEXT() \ + __disable_irq(); \ + __RV_CSR_WRITE(CSR_MSUBM, __msubm); \ + __RV_CSR_WRITE(CSR_MEPC, __mepc); \ + __RV_CSR_WRITE(CSR_MCAUSE, __mcause); + +/** @} */ /* End of Doxygen Group NMSIS_Core_IntExc */ + +#endif /* defined(__ECLIC_PRESENT) && (__ECLIC_PRESENT == 1) */ + +#ifdef __cplusplus +} +#endif +#endif /** __CORE_FEATURE_ECLIC__ */ diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_fpu.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_fpu.h new file mode 100644 index 000000000..c9e13b79d --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_fpu.h @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CORE_FEATURE_FPU_H__ +#define __CORE_FEATURE_FPU_H__ +/*! + * @file core_feature_fpu.h + * @brief FPU feature API header file for Nuclei N/NX Core + */ +/* + * FPU Feature Configuration Macro: + * 1. __FPU_PRESENT: Define whether Floating Point Unit(FPU) is present or not + * * 0: Not present + * * 1: Single precision FPU present, __RISCV_FLEN == 32 + * * 2: Double precision FPU present, __RISCV_FLEN == 64 + */ +#ifdef __cplusplus + extern "C" { +#endif + +/* ===== FPU Operations ===== */ +/** + * \defgroup NMSIS_Core_FPU_Functions FPU Functions + * \ingroup NMSIS_Core + * \brief Functions that related to the RISC-V FPU (F and D extension). + * \details + * + * Nuclei provided floating point unit by RISC-V F and D extension. + * * `F extension` adds single-precision floating-point computational + * instructions compliant with the IEEE 754-2008 arithmetic standard, __RISCV_FLEN = 32. + * The F extension adds 32 floating-point registers, f0-f31, each 32 bits wide, + * and a floating-point control and status register fcsr, which contains the + * operating mode and exception status of the floating-point unit. + * * `D extension` adds double-precision floating-point computational instructions + * compliant with the IEEE 754-2008 arithmetic standard. + * The D extension widens the 32 floating-point registers, f0-f31, to 64 bits, __RISCV_FLEN = 64 + * @{ + */ +#if defined(__FPU_PRESENT) && (__FPU_PRESENT > 0) + +#if __FPU_PRESENT == 1 + /** \brief Refer to the width of the floating point register in bits(either 32 or 64) */ + #define __RISCV_FLEN 32 +#elif __FPU_PRESENT == 2 + #define __RISCV_FLEN 64 +#else + #define __RISCV_FLEN __riscv_flen +#endif /* __FPU_PRESENT == 1 */ + +/** \brief Get FCSR CSR Register */ +#define __get_FCSR() __RV_CSR_READ(CSR_FCSR) +/** \brief Set FCSR CSR Register with val */ +#define __set_FCSR(val) __RV_CSR_WRITE(CSR_FCSR, (val)) +/** \brief Get FRM CSR Register */ +#define __get_FRM() __RV_CSR_READ(CSR_FRM) +/** \brief Set FRM CSR Register with val */ +#define __set_FRM(val) __RV_CSR_WRITE(CSR_FRM, (val)) +/** \brief Get FFLAGS CSR Register */ +#define __get_FFLAGS() __RV_CSR_READ(CSR_FFLAGS) +/** \brief Set FFLAGS CSR Register with val */ +#define __set_FFLAGS(val) __RV_CSR_WRITE(CSR_FFLAGS, (val)) + +/** \brief Enable FPU Unit */ +#define __enable_FPU() __RV_CSR_SET(CSR_MSTATUS, MSTATUS_FS) +/** + * \brief Disable FPU Unit + * \details + * * We can save power by disable FPU Unit. + * * When FPU Unit is disabled, any access to FPU related CSR registers + * and FPU instructions will cause illegal Instuction Exception. + * */ +#define __disable_FPU() __RV_CSR_CLEAR(CSR_MSTATUS, MSTATUS_FS) + + +/** + * \brief Load a single-precision value from memory into float point register freg using flw instruction + * \details The FLW instruction loads a single-precision floating point value from memory + * address (addr + ofs) into floating point register freg(f0-f31) + * \param [in] freg The floating point register, eg. FREG(0), f0 + * \param [in] addr The memory base address, 4 byte aligned required + * \param [in] ofs a 12-bit immediate signed byte offset value, should be an const value + * \remarks + * * FLW and FSW operations need to make sure the address is 4 bytes aligned, + * otherwise it will cause exception code 4(Load address misaligned) or 6 (Store/AMO address misaligned) + * * FLW and FSW do not modify the bits being transferred; in particular, the payloads of non-canonical + * NaNs are preserved + * + */ +#define __RV_FLW(freg, addr, ofs) \ + ({ \ + register rv_csr_t __addr = (rv_csr_t)(addr); \ + __ASM volatile("flw " STRINGIFY(freg) ", %0(%1) " \ + : : "I"(ofs), "r"(__addr) \ + : "memory"); \ + }) + +/** + * \brief Store a single-precision value from float point freg into memory using fsw instruction + * \details The FSW instruction stores a single-precision value from floating point register to memory + * \param [in] freg The floating point register(f0-f31), eg. FREG(0), f0 + * \param [in] addr The memory base address, 4 byte aligned required + * \param [in] ofs a 12-bit immediate signed byte offset value, should be an const value + * \remarks + * * FLW and FSW operations need to make sure the address is 4 bytes aligned, + * otherwise it will cause exception code 4(Load address misaligned) or 6 (Store/AMO address misaligned) + * * FLW and FSW do not modify the bits being transferred; in particular, the payloads of non-canonical + * NaNs are preserved + * + */ +#define __RV_FSW(freg, addr, ofs) \ + ({ \ + register rv_csr_t __addr = (rv_csr_t)(addr); \ + __ASM volatile("fsw " STRINGIFY(freg) ", %0(%1) " \ + : : "I"(ofs), "r"(__addr) \ + : "memory"); \ + }) + +/** + * \brief Load a double-precision value from memory into float point register freg using fld instruction + * \details The FLD instruction loads a double-precision floating point value from memory + * address (addr + ofs) into floating point register freg(f0-f31) + * \param [in] freg The floating point register, eg. FREG(0), f0 + * \param [in] addr The memory base address, 8 byte aligned required + * \param [in] ofs a 12-bit immediate signed byte offset value, should be an const value + * \attention + * * Function only available for double precision floating point unit, FLEN = 64 + * \remarks + * * FLD and FSD operations need to make sure the address is 8 bytes aligned, + * otherwise it will cause exception code 4(Load address misaligned) or 6 (Store/AMO address misaligned) + * * FLD and FSD do not modify the bits being transferred; in particular, the payloads of non-canonical + * NaNs are preserved. + */ +#define __RV_FLD(freg, addr, ofs) \ + ({ \ + register rv_csr_t __addr = (rv_csr_t)(addr); \ + __ASM volatile("fld " STRINGIFY(freg) ", %0(%1) " \ + : : "I"(ofs), "r"(__addr) \ + : "memory"); \ + }) + +/** + * \brief Store a double-precision value from float point freg into memory using fsd instruction + * \details The FSD instruction stores double-precision value from floating point register to memory + * \param [in] freg The floating point register(f0-f31), eg. FREG(0), f0 + * \param [in] addr The memory base address, 8 byte aligned required + * \param [in] ofs a 12-bit immediate signed byte offset value, should be an const value + * \attention + * * Function only available for double precision floating point unit, FLEN = 64 + * \remarks + * * FLD and FSD operations need to make sure the address is 8 bytes aligned, + * otherwise it will cause exception code 4(Load address misaligned) or 6 (Store/AMO address misaligned) + * * FLD and FSD do not modify the bits being transferred; in particular, the payloads of non-canonical + * NaNs are preserved. + * + */ +#define __RV_FSD(freg, addr, ofs) \ + ({ \ + register rv_csr_t __addr = (rv_csr_t)(addr); \ + __ASM volatile("fsd " STRINGIFY(freg) ", %0(%1) " \ + : : "I"(ofs), "r"(__addr) \ + : "memory"); \ + }) + +/** + * \def __RV_FLOAD + * \brief Load a float point value from memory into float point register freg using flw/fld instruction + * \details + * * For Single-Precison Floating-Point Mode(__FPU_PRESENT == 1, __RISCV_FLEN == 32): + * It will call \ref __RV_FLW to load a single-precision floating point value from memory to floating point register + * * For Double-Precison Floating-Point Mode(__FPU_PRESENT == 2, __RISCV_FLEN == 64): + * It will call \ref __RV_FLD to load a double-precision floating point value from memory to floating point register + * + * \attention + * Function behaviour is different for __FPU_PRESENT = 1 or 2, please see the real function this macro represent + */ +/** + * \def __RV_FSTORE + * \brief Store a float value from float point freg into memory using fsw/fsd instruction + * \details + * * For Single-Precison Floating-Point Mode(__FPU_PRESENT == 1, __RISCV_FLEN == 32): + * It will call \ref __RV_FSW to store floating point register into memory + * * For Double-Precison Floating-Point Mode(__FPU_PRESENT == 2, __RISCV_FLEN == 64): + * It will call \ref __RV_FSD to store floating point register into memory + * + * \attention + * Function behaviour is different for __FPU_PRESENT = 1 or 2, please see the real function this macro represent + */ +#if __FPU_PRESENT == 1 +#define __RV_FLOAD __RV_FLW +#define __RV_FSTORE __RV_FSW +/** \brief Type of FPU register, depends on the FLEN defined in RISC-V */ +typedef uint32_t rv_fpu_t; +#elif __FPU_PRESENT == 2 +#define __RV_FLOAD __RV_FLD +#define __RV_FSTORE __RV_FSD +/** \brief Type of FPU register, depends on the FLEN defined in RISC-V */ +typedef uint64_t rv_fpu_t; +#endif /* __FPU_PRESENT == 2 */ + +/** + * \brief Save FPU context into variables for interrupt nesting + * \details + * This macro is used to declare variables which are used for saving + * FPU context, and it will store the nessary fpu registers into + * these variables, it need to be used in a interrupt when in this + * interrupt fpu registers are used. + * \remarks + * - It need to be used together with \ref RESTORE_FPU_CONTEXT + * - Don't use variable names __fpu_context in your ISR code + * - If you isr code will use fpu registers, and this interrupt is nested. + * Then you can do it like this: + * \code + * void eclic_mtip_handler(void) + * { + * // !!!Interrupt is enabled here!!! + * // !!!Higher priority interrupt could nest it!!! + * + * // Necessary only when you need to use fpu registers + * // in this isr handler functions + * SAVE_FPU_CONTEXT(); + * + * // put you own interrupt handling code here + * + * // pair of SAVE_FPU_CONTEXT() + * RESTORE_FPU_CONTEXT(); + * } + * \endcode + */ +#define SAVE_FPU_CONTEXT() \ + rv_fpu_t __fpu_context[20]; \ + __RV_FSTORE(FREG(0), __fpu_context, 0 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(1), __fpu_context, 1 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(2), __fpu_context, 2 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(3), __fpu_context, 3 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(4), __fpu_context, 4 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(5), __fpu_context, 5 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(6), __fpu_context, 6 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(7), __fpu_context, 7 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(10), __fpu_context, 8 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(11), __fpu_context, 9 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(12), __fpu_context, 10 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(13), __fpu_context, 11 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(14), __fpu_context, 12 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(15), __fpu_context, 13 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(16), __fpu_context, 14 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(17), __fpu_context, 15 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(28), __fpu_context, 16 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(29), __fpu_context, 17 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(30), __fpu_context, 18 << LOG_FPREGBYTES); \ + __RV_FSTORE(FREG(31), __fpu_context, 19 << LOG_FPREGBYTES); + +/** + * \brief Restore necessary fpu registers from variables for interrupt nesting + * \details + * This macro is used restore necessary fpu registers from pre-defined variables + * in \ref SAVE_FPU_CONTEXT macro. + * \remarks + * - It need to be used together with \ref SAVE_FPU_CONTEXT + */ +#define RESTORE_FPU_CONTEXT() \ + __RV_FLOAD(FREG(0), __fpu_context, 0 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(1), __fpu_context, 1 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(2), __fpu_context, 2 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(3), __fpu_context, 3 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(4), __fpu_context, 4 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(5), __fpu_context, 5 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(6), __fpu_context, 6 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(7), __fpu_context, 7 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(10), __fpu_context, 8 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(11), __fpu_context, 9 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(12), __fpu_context, 10 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(13), __fpu_context, 11 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(14), __fpu_context, 12 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(15), __fpu_context, 13 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(16), __fpu_context, 14 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(17), __fpu_context, 15 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(28), __fpu_context, 16 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(29), __fpu_context, 17 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(30), __fpu_context, 18 << LOG_FPREGBYTES); \ + __RV_FLOAD(FREG(31), __fpu_context, 19 << LOG_FPREGBYTES); +#else +#define SAVE_FPU_CONTEXT() +#define RESTORE_FPU_CONTEXT() +#endif /* __FPU_PRESENT > 0 */ +/** @} */ /* End of Doxygen Group NMSIS_Core_FPU_Functions */ + +#ifdef __cplusplus +} +#endif +#endif /** __RISCV_EXT_FPU_H__ */ diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_pmp.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_pmp.h new file mode 100644 index 000000000..997dfaee1 --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_pmp.h @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CORE_FEATURE_PMP_H__ +#define __CORE_FEATURE_PMP_H__ +/*! + * @file core_feature_pmp.h + * @brief PMP feature API header file for Nuclei N/NX Core + */ +/* + * PMP Feature Configuration Macro: + * 1. __PMP_PRESENT: Define whether Physical Memory Protection(PMP) is present or not + * * 0: Not present + * * 1: Present + * 2. __PMP_ENTRY_NUM: Define the number of PMP entries, only 8 or 16 is configurable. + */ +#ifdef __cplusplus + extern "C" { +#endif + +#if defined(__PMP_PRESENT) && (__PMP_PRESENT == 1) +/* ===== PMP Operations ===== */ +/** + * \defgroup NMSIS_Core_PMP_Functions PMP Functions + * \ingroup NMSIS_Core + * \brief Functions that related to the RISCV Phyiscal Memory Protection. + * \details + * Optional physical memory protection (PMP) unit provides per-hart machine-mode + * control registers to allow physical memory access privileges (read, write, execute) + * to be specified for each physical memory region. + * + * The PMP can supports region access control settings as small as four bytes. + * + * @{ + */ +#ifndef __PMP_ENTRY_NUM +/* numbers of PMP entries(__PMP_ENTRY_NUM) should be defined in */ +#error "__PMP_ENTRY_NUM is not defined, please check!" +#endif + +/** + * \brief Get 8bit PMPxCFG Register by PMP entry index + * \details Return the content of the PMPxCFG Register. + * \param [in] idx PMP region index(0-15) + * \return PMPxCFG Register value + */ +__STATIC_INLINE uint8_t __get_PMPxCFG(uint32_t idx) +{ + rv_csr_t pmpcfg = 0; + + if (idx >= __PMP_ENTRY_NUM) return 0; +#if __RISCV_XLEN == 32 + if (idx < 4) { + pmpcfg = __RV_CSR_READ(CSR_PMPCFG0); + } else if ((idx >=4) && (idx < 8)) { + idx -= 4; + pmpcfg = __RV_CSR_READ(CSR_PMPCFG1); + } else if ((idx >=8) && (idx < 12)) { + idx -= 8; + pmpcfg = __RV_CSR_READ(CSR_PMPCFG2); + } else { + idx -= 12; + pmpcfg = __RV_CSR_READ(CSR_PMPCFG3); + } + + idx = idx << 3; + return (uint8_t)((pmpcfg>>idx) & 0xFF); +#elif __RISCV_XLEN == 64 + if (idx < 8) { + pmpcfg = __RV_CSR_READ(CSR_PMPCFG0); + } else { + idx -= 8; + pmpcfg = __RV_CSR_READ(CSR_PMPCFG2); + } + idx = idx << 3; + return (uint8_t)((pmpcfg>>idx) & 0xFF); +#else + // TODO Add RV128 Handling + return 0; +#endif +} + +/** + * \brief Set 8bit PMPxCFG by pmp entry index + * \details Set the given pmpxcfg value to the PMPxCFG Register. + * \param [in] idx PMPx region index(0-15) + * \param [in] pmpxcfg PMPxCFG register value to set + */ +__STATIC_INLINE void __set_PMPxCFG(uint32_t idx, uint8_t pmpxcfg) +{ + rv_csr_t pmpcfgx = 0; + if (idx >= __PMP_ENTRY_NUM) return; + +#if __RISCV_XLEN == 32 + if (idx < 4) { + pmpcfgx = __RV_CSR_READ(CSR_PMPCFG0); + idx = idx << 3; + pmpcfgx = (pmpcfgx & ~(0xFFUL << idx)) | ((rv_csr_t)pmpxcfg << idx); + __RV_CSR_WRITE(CSR_PMPCFG0, pmpcfgx); + } else if ((idx >=4) && (idx < 8)) { + idx -= 4; + pmpcfgx = __RV_CSR_READ(CSR_PMPCFG1); + idx = idx << 3; + pmpcfgx = (pmpcfgx & ~(0xFFUL << idx)) | ((rv_csr_t)pmpxcfg << idx); + __RV_CSR_WRITE(CSR_PMPCFG1, pmpcfgx); + } else if ((idx >=8) && (idx < 12)) { + idx -= 8; + pmpcfgx = __RV_CSR_READ(CSR_PMPCFG2); + idx = idx << 3; + pmpcfgx = (pmpcfgx & ~(0xFFUL << idx)) | ((rv_csr_t)pmpxcfg << idx); + __RV_CSR_WRITE(CSR_PMPCFG2, pmpcfgx); + } else { + idx -= 12; + pmpcfgx = __RV_CSR_READ(CSR_PMPCFG3); + idx = idx << 3; + pmpcfgx = (pmpcfgx & ~(0xFFUL << idx)) | ((rv_csr_t)pmpxcfg << idx); + __RV_CSR_WRITE(CSR_PMPCFG3, pmpcfgx); + } +#elif __RISCV_XLEN == 64 + if (idx < 8) { + pmpcfgx = __RV_CSR_READ(CSR_PMPCFG0); + idx = idx << 3; + pmpcfgx = (pmpcfgx & ~(0xFFULL << idx)) | ((rv_csr_t)pmpxcfg << idx); + __RV_CSR_WRITE(CSR_PMPCFG0, pmpcfgx); + } else { + idx -= 8; + pmpcfgx = __RV_CSR_READ(CSR_PMPCFG2); + idx = idx << 3; + pmpcfgx = (pmpcfgx & ~(0xFFULL << idx)) | ((rv_csr_t)pmpxcfg << idx); + __RV_CSR_WRITE(CSR_PMPCFG2, pmpcfgx); + } +#else + // TODO Add RV128 Handling +#endif +} + +/** + * \brief Get PMPCFGx Register by index + * \details Return the content of the PMPCFGx Register. + * \param [in] idx PMPCFG CSR index(0-3) + * \return PMPCFGx Register value + * \remark + * - For RV64, only idx = 0 and idx = 2 is allowed. + * pmpcfg0 and pmpcfg2 hold the configurations + * for the 16 PMP entries, pmpcfg1 and pmpcfg3 are illegal + * - For RV32, pmpcfg0–pmpcfg3, hold the configurations + * pmp0cfg–pmp15cfg for the 16 PMP entries + */ +__STATIC_INLINE rv_csr_t __get_PMPCFGx(uint32_t idx) +{ + switch (idx) { + case 0: return __RV_CSR_READ(CSR_PMPCFG0); + case 1: return __RV_CSR_READ(CSR_PMPCFG1); + case 2: return __RV_CSR_READ(CSR_PMPCFG2); + case 3: return __RV_CSR_READ(CSR_PMPCFG3); + default: return 0; + } +} + +/** + * \brief Set PMPCFGx by index + * \details Write the given value to the PMPCFGx Register. + * \param [in] idx PMPCFG CSR index(0-3) + * \param [in] pmpcfg PMPCFGx Register value to set + * \remark + * - For RV64, only idx = 0 and idx = 2 is allowed. + * pmpcfg0 and pmpcfg2 hold the configurations + * for the 16 PMP entries, pmpcfg1 and pmpcfg3 are illegal + * - For RV32, pmpcfg0–pmpcfg3, hold the configurations + * pmp0cfg–pmp15cfg for the 16 PMP entries + */ +__STATIC_INLINE void __set_PMPCFGx(uint32_t idx, rv_csr_t pmpcfg) +{ + switch (idx) { + case 0: __RV_CSR_WRITE(CSR_PMPCFG0, pmpcfg); break; + case 1: __RV_CSR_WRITE(CSR_PMPCFG1, pmpcfg); break; + case 2: __RV_CSR_WRITE(CSR_PMPCFG2, pmpcfg); break; + case 3: __RV_CSR_WRITE(CSR_PMPCFG3, pmpcfg); break; + default: return; + } +} + +/** + * \brief Get PMPADDRx Register by index + * \details Return the content of the PMPADDRx Register. + * \param [in] idx PMP region index(0-15) + * \return PMPADDRx Register value + */ +__STATIC_INLINE rv_csr_t __get_PMPADDRx(uint32_t idx) +{ + switch (idx) { + case 0: return __RV_CSR_READ(CSR_PMPADDR0); + case 1: return __RV_CSR_READ(CSR_PMPADDR1); + case 2: return __RV_CSR_READ(CSR_PMPADDR2); + case 3: return __RV_CSR_READ(CSR_PMPADDR3); + case 4: return __RV_CSR_READ(CSR_PMPADDR4); + case 5: return __RV_CSR_READ(CSR_PMPADDR5); + case 6: return __RV_CSR_READ(CSR_PMPADDR6); + case 7: return __RV_CSR_READ(CSR_PMPADDR7); + case 8: return __RV_CSR_READ(CSR_PMPADDR8); + case 9: return __RV_CSR_READ(CSR_PMPADDR9); + case 10: return __RV_CSR_READ(CSR_PMPADDR10); + case 11: return __RV_CSR_READ(CSR_PMPADDR11); + case 12: return __RV_CSR_READ(CSR_PMPADDR12); + case 13: return __RV_CSR_READ(CSR_PMPADDR13); + case 14: return __RV_CSR_READ(CSR_PMPADDR14); + case 15: return __RV_CSR_READ(CSR_PMPADDR15); + default: return 0; + } +} + +/** + * \brief Set PMPADDRx by index + * \details Write the given value to the PMPADDRx Register. + * \param [in] idx PMP region index(0-15) + * \param [in] pmpaddr PMPADDRx Register value to set + */ +__STATIC_INLINE void __set_PMPADDRx(uint32_t idx, rv_csr_t pmpaddr) +{ + switch (idx) { + case 0: __RV_CSR_WRITE(CSR_PMPADDR0, pmpaddr); break; + case 1: __RV_CSR_WRITE(CSR_PMPADDR1, pmpaddr); break; + case 2: __RV_CSR_WRITE(CSR_PMPADDR2, pmpaddr); break; + case 3: __RV_CSR_WRITE(CSR_PMPADDR3, pmpaddr); break; + case 4: __RV_CSR_WRITE(CSR_PMPADDR4, pmpaddr); break; + case 5: __RV_CSR_WRITE(CSR_PMPADDR5, pmpaddr); break; + case 6: __RV_CSR_WRITE(CSR_PMPADDR6, pmpaddr); break; + case 7: __RV_CSR_WRITE(CSR_PMPADDR7, pmpaddr); break; + case 8: __RV_CSR_WRITE(CSR_PMPADDR8, pmpaddr); break; + case 9: __RV_CSR_WRITE(CSR_PMPADDR9, pmpaddr); break; + case 10: __RV_CSR_WRITE(CSR_PMPADDR10, pmpaddr); break; + case 11: __RV_CSR_WRITE(CSR_PMPADDR11, pmpaddr); break; + case 12: __RV_CSR_WRITE(CSR_PMPADDR12, pmpaddr); break; + case 13: __RV_CSR_WRITE(CSR_PMPADDR13, pmpaddr); break; + case 14: __RV_CSR_WRITE(CSR_PMPADDR14, pmpaddr); break; + case 15: __RV_CSR_WRITE(CSR_PMPADDR15, pmpaddr); break; + default: return; + } +} +/** @} */ /* End of Doxygen Group NMSIS_Core_PMP_Functions */ +#endif /* defined(__PMP_PRESENT) && (__PMP_PRESENT == 1) */ + +#ifdef __cplusplus +} +#endif +#endif /** __CORE_FEATURE_PMP_H__ */ diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_timer.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_timer.h new file mode 100644 index 000000000..6e9b7af39 --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/core_feature_timer.h @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __CORE_FEATURE_TIMER_H__ +#define __CORE_FEATURE_TIMER_H__ +/*! + * @file core_feature_timer.h + * @brief System Timer feature API header file for Nuclei N/NX Core + */ +/* + * System Timer Feature Configuration Macro: + * 1. __SYSTIMER_PRESENT: Define whether Private System Timer is present or not. + * * 0: Not present + * * 1: Present + * 2. __SYSTIMER_BASEADDR: Define the base address of the System Timer. + */ +#ifdef __cplusplus + extern "C" { +#endif + +#if defined(__SYSTIMER_PRESENT) && (__SYSTIMER_PRESENT == 1) +/** + * \defgroup NMSIS_Core_SysTimer_Registers Register Define and Type Definitions Of System Timer + * \ingroup NMSIS_Core_Registers + * \brief Type definitions and defines for system timer registers. + * + * @{ + */ +/** + * \brief Structure type to access the System Timer (SysTimer). + * \details + * Structure definition to access the system timer(SysTimer). + * \remarks + * - MSFTRST register is introduced in Nuclei N Core version 1.3(\ref __NUCLEI_N_REV >= 0x0103) + * - MSTOP register is renamed to MTIMECTL register in Nuclei N Core version 1.4(\ref __NUCLEI_N_REV >= 0x0104) + * - CMPCLREN and CLKSRC bit in MTIMECTL register is introduced in Nuclei N Core version 1.4(\ref __NUCLEI_N_REV >= 0x0104) + */ +typedef struct { + __IOM uint64_t MTIMER; /*!< Offset: 0x000 (R/W) System Timer current value 64bits Register */ + __IOM uint64_t MTIMERCMP; /*!< Offset: 0x008 (R/W) System Timer compare Value 64bits Register */ + __IOM uint32_t RESERVED0[0x3F8]; /*!< Offset: 0x010 - 0xFEC Reserved */ + __IOM uint32_t MSFTRST; /*!< Offset: 0xFF0 (R/W) System Timer Software Core Reset Register */ + __IOM uint32_t RESERVED1; /*!< Offset: 0xFF4 Reserved */ + __IOM uint32_t MTIMECTL; /*!< Offset: 0xFF8 (R/W) System Timer Control Register, previously MSTOP register */ + __IOM uint32_t MSIP; /*!< Offset: 0xFFC (R/W) System Timer SW interrupt Register */ +} SysTimer_Type; + +/* Timer Control / Status Register Definitions */ +#define SysTimer_MTIMECTL_TIMESTOP_Pos 0U /*!< SysTick Timer MTIMECTL: TIMESTOP bit Position */ +#define SysTimer_MTIMECTL_TIMESTOP_Msk (1UL << SysTimer_MTIMECTL_TIMESTOP_Pos) /*!< SysTick Timer MTIMECTL: TIMESTOP Mask */ +#define SysTimer_MTIMECTL_CMPCLREN_Pos 1U /*!< SysTick Timer MTIMECTL: CMPCLREN bit Position */ +#define SysTimer_MTIMECTL_CMPCLREN_Msk (1UL << SysTimer_MTIMECTL_CMPCLREN_Pos) /*!< SysTick Timer MTIMECTL: CMPCLREN Mask */ +#define SysTimer_MTIMECTL_CLKSRC_Pos 2U /*!< SysTick Timer MTIMECTL: CLKSRC bit Position */ +#define SysTimer_MTIMECTL_CLKSRC_Msk (1UL << SysTimer_MTIMECTL_CLKSRC_Pos) /*!< SysTick Timer MTIMECTL: CLKSRC Mask */ + +#define SysTimer_MSIP_MSIP_Pos 0U /*!< SysTick Timer MSIP: MSIP bit Position */ +#define SysTimer_MSIP_MSIP_Msk (1UL << SysTimer_MSIP_MSIP_Pos) /*!< SysTick Timer MSIP: MSIP Mask */ + +#define SysTimer_MTIMER_Msk (0xFFFFFFFFFFFFFFFFULL) /*!< SysTick Timer MTIMER value Mask */ +#define SysTimer_MTIMERCMP_Msk (0xFFFFFFFFFFFFFFFFULL) /*!< SysTick Timer MTIMERCMP value Mask */ +#define SysTimer_MTIMECTL_Msk (0xFFFFFFFFUL) /*!< SysTick Timer MTIMECTL/MSTOP value Mask */ +#define SysTimer_MSIP_Msk (0xFFFFFFFFUL) /*!< SysTick Timer MSIP value Mask */ +#define SysTimer_MSFTRST_Msk (0xFFFFFFFFUL) /*!< SysTick Timer MSFTRST value Mask */ + +#define SysTimer_MSFRST_KEY (0x80000A5FUL) /*!< SysTick Timer Software Reset Request Key */ + +#ifndef __SYSTIMER_BASEADDR +/* Base address of SYSTIMER(__SYSTIMER_BASEADDR) should be defined in */ +#error "__SYSTIMER_BASEADDR is not defined, please check!" +#endif +/* System Timer Memory mapping of Device */ +#define SysTimer_BASE __SYSTIMER_BASEADDR /*!< SysTick Base Address */ +#define SysTimer ((SysTimer_Type *) SysTimer_BASE) /*!< SysTick configuration struct */ +/** @} */ /* end of group NMSIS_Core_SysTimer_Registers */ + +/* ################################## SysTimer function ############################################ */ +/** + * \defgroup NMSIS_Core_SysTimer SysTimer Functions + * \brief Functions that configure the Core System Timer. + * @{ + */ +/** + * \brief Set system timer load value + * \details + * This function set the system timer load value in MTIMER register. + * \param [in] value value to set system timer MTIMER register. + * \remarks + * - Load value is 64bits wide. + * - \ref SysTimer_GetLoadValue + */ +__STATIC_FORCEINLINE void SysTimer_SetLoadValue(uint64_t value) +{ + SysTimer->MTIMER = value; +} + +/** + * \brief Get system timer load value + * \details + * This function get the system timer current value in MTIMER register. + * \return current value(64bit) of system timer MTIMER register. + * \remarks + * - Load value is 64bits wide. + * - \ref SysTimer_SetLoadValue + */ +__STATIC_FORCEINLINE uint64_t SysTimer_GetLoadValue(void) +{ + return SysTimer->MTIMER; +} + +/** + * \brief Set system timer compare value + * \details + * This function set the system Timer compare value in MTIMERCMP register. + * \param [in] value compare value to set system timer MTIMERCMP register. + * \remarks + * - Compare value is 64bits wide. + * - If compare value is larger than current value timer interrupt generate. + * - Modify the load value or compare value less to clear the interrupt. + * - \ref SysTimer_GetCompareValue + */ +__STATIC_FORCEINLINE void SysTimer_SetCompareValue(uint64_t value) +{ + SysTimer->MTIMERCMP = value; +} + +/** + * \brief Get system timer compare value + * \details + * This function get the system timer compare value in MTIMERCMP register. + * \return compare value of system timer MTIMERCMP register. + * \remarks + * - Compare value is 64bits wide. + * - \ref SysTimer_SetCompareValue + */ +__STATIC_FORCEINLINE uint64_t SysTimer_GetCompareValue(void) +{ + return SysTimer->MTIMERCMP; +} + +/** + * \brief Enable system timer counter running + * \details + * Enable system timer counter running by clear + * TIMESTOP bit in MTIMECTL register. + */ +__STATIC_FORCEINLINE void SysTimer_Start(void) +{ + SysTimer->MTIMECTL &= ~(SysTimer_MTIMECTL_TIMESTOP_Msk); +} + +/** + * \brief Stop system timer counter running + * \details + * Stop system timer counter running by set + * TIMESTOP bit in MTIMECTL register. + */ +__STATIC_FORCEINLINE void SysTimer_Stop(void) +{ + SysTimer->MTIMECTL |= SysTimer_MTIMECTL_TIMESTOP_Msk; +} + +/** + * \brief Set system timer control value + * \details + * This function set the system timer MTIMECTL register value. + * \param [in] mctl value to set MTIMECTL register + * \remarks + * - Bit TIMESTOP is used to start and stop timer. + * Clear TIMESTOP bit to 0 to start timer, otherwise to stop timer. + * - Bit CMPCLREN is used to enable auto MTIMER clear to zero when MTIMER >= MTIMERCMP. + * Clear CMPCLREN bit to 0 to stop auto clear MTIMER feature, otherwise to enable it. + * - Bit CLKSRC is used to select timer clock source. + * Clear CLKSRC bit to 0 to use *mtime_toggle_a*, otherwise use *core_clk_aon* + * - \ref SysTimer_GetControlValue + */ +__STATIC_FORCEINLINE void SysTimer_SetControlValue(uint32_t mctl) +{ + SysTimer->MTIMECTL = (mctl & SysTimer_MTIMECTL_Msk); +} + +/** + * \brief Get system timer control value + * \details + * This function get the system timer MTIMECTL register value. + * \return MTIMECTL register value + * \remarks + * - \ref SysTimer_SetControlValue + */ +__STATIC_FORCEINLINE uint32_t SysTimer_GetControlValue(void) +{ + return (SysTimer->MTIMECTL & SysTimer_MTIMECTL_Msk); +} + +/** + * \brief Trigger or set software interrupt via system timer + * \details + * This function set the system timer MSIP bit in MSIP register. + * \remarks + * - Set system timer MSIP bit and generate a SW interrupt. + * - \ref SysTimer_ClearSWIRQ + * - \ref SysTimer_GetMsipValue + */ +__STATIC_FORCEINLINE void SysTimer_SetSWIRQ(void) +{ + SysTimer->MSIP |= SysTimer_MSIP_MSIP_Msk; +} + +/** + * \brief Clear system timer software interrupt pending request + * \details + * This function clear the system timer MSIP bit in MSIP register. + * \remarks + * - Clear system timer MSIP bit in MSIP register to clear the software interrupt pending. + * - \ref SysTimer_SetSWIRQ + * - \ref SysTimer_GetMsipValue + */ +__STATIC_FORCEINLINE void SysTimer_ClearSWIRQ(void) +{ + SysTimer->MSIP &= ~SysTimer_MSIP_MSIP_Msk; +} + +/** + * \brief Get system timer MSIP register value + * \details + * This function get the system timer MSIP register value. + * \return Value of Timer MSIP register. + * \remarks + * - Bit0 is SW interrupt flag. + * Bit0 is 1 then SW interrupt set. Bit0 is 0 then SW interrupt clear. + * - \ref SysTimer_SetSWIRQ + * - \ref SysTimer_ClearSWIRQ + */ +__STATIC_FORCEINLINE uint32_t SysTimer_GetMsipValue(void) +{ + return (uint32_t)(SysTimer->MSIP & SysTimer_MSIP_Msk); +} + +/** + * \brief Set system timer MSIP register value + * \details + * This function set the system timer MSIP register value. + * \param [in] msip value to set MSIP register + */ +__STATIC_FORCEINLINE void SysTimer_SetMsipValue(uint32_t msip) +{ + SysTimer->MSIP = (msip & SysTimer_MSIP_Msk); +} + +/** + * \brief Do software reset request + * \details + * This function will do software reset request through MTIMER + * - Software need to write \ref SysTimer_MSFRST_KEY to generate software reset request + * - The software request flag can be cleared by reset operation to clear + * \remarks + * - The software reset is sent to SoC, SoC need to generate reset signal and send back to Core + * - This function will not return, it will do while(1) to wait the Core reset happened + */ +__STATIC_FORCEINLINE void SysTimer_SoftwareReset(void) +{ + SysTimer->MSFTRST = SysTimer_MSFRST_KEY; + while(1); +} + +#if defined (__Vendor_SysTickConfig) && (__Vendor_SysTickConfig == 0U) && defined(__ECLIC_PRESENT) && (__ECLIC_PRESENT == 1) +/** + * \brief System Tick Configuration + * \details Initializes the System Timer and its non-vector interrupt, and starts the System Tick Timer. + * + * In our default implementation, the timer counter will be set to zero, and it will start a timer compare non-vector interrupt + * when it matchs the ticks user set, during the timer interrupt user should reload the system tick using \ref SysTick_Reload function + * or similar function written by user, so it can produce period timer interrupt. + * \param [in] ticks Number of ticks between two interrupts. + * \return 0 Function succeeded. + * \return 1 Function failed. + * \remarks + * - For \ref __NUCLEI_N_REV >= 0x0104, the CMPCLREN bit in MTIMECTL is introduced, + * but we assume that the CMPCLREN bit is set to 0, so MTIMER register will not be + * auto cleared to 0 when MTIMER >= MTIMERCMP. + * - When the variable \ref __Vendor_SysTickConfig is set to 1, then the + * function \ref SysTick_Config is not included. + * - In this case, the file .h must contain a vendor-specific implementation + * of this function. + * - If user need this function to start a period timer interrupt, then in timer interrupt handler + * routine code, user should call \ref SysTick_Reload with ticks to reload the timer. + * - This function only available when __SYSTIMER_PRESENT == 1 and __ECLIC_PRESENT == 1 and __Vendor_SysTickConfig == 0 + * \sa + * - \ref SysTimer_SetCompareValue; SysTimer_SetLoadValue + */ +__STATIC_INLINE uint32_t SysTick_Config(uint64_t ticks) +{ + SysTimer_SetLoadValue(0); + SysTimer_SetCompareValue(ticks); + ECLIC_SetShvIRQ(SysTimer_IRQn, ECLIC_NON_VECTOR_INTERRUPT); + ECLIC_SetLevelIRQ(SysTimer_IRQn, 0); + ECLIC_EnableIRQ(SysTimer_IRQn); + return (0UL); +} + +/** + * \brief System Tick Reload + * \details Reload the System Timer Tick when the MTIMECMP reached TIME value + * + * \param [in] ticks Number of ticks between two interrupts. + * \return 0 Function succeeded. + * \return 1 Function failed. + * \remarks + * - For \ref __NUCLEI_N_REV >= 0x0104, the CMPCLREN bit in MTIMECTL is introduced, + * but for this \ref SysTick_Config function, we assume this CMPCLREN bit is set to 0, + * so in interrupt handler function, user still need to set the MTIMERCMP or MTIMER to reload + * the system tick, if vendor want to use this timer's auto clear feature, they can define + * \ref __Vendor_SysTickConfig to 1, and implement \ref SysTick_Config and \ref SysTick_Reload functions. + * - When the variable \ref __Vendor_SysTickConfig is set to 1, then the + * function \ref SysTick_Reload is not included. + * - In this case, the file .h must contain a vendor-specific implementation + * of this function. + * - This function only available when __SYSTIMER_PRESENT == 1 and __ECLIC_PRESENT == 1 and __Vendor_SysTickConfig == 0 + * - Since the MTIMERCMP value might overflow, if overflowed, MTIMER will be set to 0, and MTIMERCMP set to ticks + * \sa + * - \ref SysTimer_SetCompareValue + * - \ref SysTimer_SetLoadValue + */ +__STATIC_FORCEINLINE uint32_t SysTick_Reload(uint64_t ticks) +{ + uint64_t cur_ticks = SysTimer->MTIMER; + uint64_t reload_ticks = ticks + cur_ticks; + + if (__USUALLY(reload_ticks > cur_ticks)) { + SysTimer->MTIMERCMP = reload_ticks; + } else { + /* When added the ticks value, then the MTIMERCMP < TIMER, + * which means the MTIMERCMP is overflowed, + * so we need to reset the counter to zero */ + SysTimer->MTIMER = 0; + SysTimer->MTIMERCMP = ticks; + } + + return (0UL); +} + +#endif /* defined(__Vendor_SysTickConfig) && (__Vendor_SysTickConfig == 0U) */ +/** @} */ /* End of Doxygen Group NMSIS_Core_SysTimer */ + +#endif /* defined(__SYSTIMER_PRESENT) && (__SYSTIMER_PRESENT == 1) */ + +#ifdef __cplusplus +} +#endif +#endif /** __CORE_FEATURE_TIMER_H__ */ + diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_compiler.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_compiler.h new file mode 100644 index 000000000..c5278db1b --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_compiler.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NMSIS_COMPILER_H +#define __NMSIS_COMPILER_H + +#include + +/*! + * @file nmsis_compiler.h + * @brief NMSIS compiler generic header file + */ +#if defined ( __GNUC__ ) + /** GNU GCC Compiler */ + #include "nmsis_gcc.h" +#else + #error Unknown compiler. +#endif + + +#endif /* __NMSIS_COMPILER_H */ + diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_core.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_core.h new file mode 100644 index 000000000..fa7821da1 --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_core.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2009-2019 Arm Limited. All rights reserved. + * -- Adaptable modifications made for Nuclei Processors. -- + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __NMSIS_CORE_H__ +#define __NMSIS_CORE_H__ + +#include + +#ifdef __cplusplus + extern "C" { +#endif + +#include "nmsis_version.h" + +/** + * \ingroup NMSIS_Core_VersionControl + * @{ + */ +/* The following enum __NUCLEI_N_REV/__NUCLEI_NX_REV definition in this file + * is only used for doxygen documentation generation, + * The .h is the real file to define it by vendor + */ +#if defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__) +/** + * \brief Nuclei N class core revision number + * \details + * Reversion number format: [15:8] revision number, [7:0] patch number + * \attention + * This define is exclusive with \ref __NUCLEI_NX_REV + */ +#define __NUCLEI_N_REV (0x0104) +/** + * \brief Nuclei NX class core revision number + * \details + * Reversion number format: [15:8] revision number, [7:0] patch number + * \attention + * This define is exclusive with \ref __NUCLEI_N_REV + */ +#define __NUCLEI_NX_REV (0x0100) +#endif /* __ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__ */ +/** @} */ /* End of Group NMSIS_Core_VersionControl */ + +#include "nmsis_compiler.h" /* NMSIS compiler specific defines */ + +/* === Include Nuclei Core Related Headers === */ +/* Include core base feature header file */ +#include "core_feature_base.h" + +#ifndef __NMSIS_GENERIC +/* Include core eclic feature header file */ +#include "core_feature_eclic.h" +/* Include core systimer feature header file */ +#include "core_feature_timer.h" +#endif + +/* Include core fpu feature header file */ +#include "core_feature_fpu.h" +/* Include core dsp feature header file */ +#include "core_feature_dsp.h" +/* Include core pmp feature header file */ +#include "core_feature_pmp.h" +/* Include core cache feature header file */ +#include "core_feature_cache.h" + +/* Include compatiable functions header file */ +#include "core_compatiable.h" + +#ifdef __cplusplus +} +#endif +#endif /* __NMSIS_CORE_H__ */ diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_gcc.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_gcc.h new file mode 100644 index 000000000..9f7eb9d26 --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_gcc.h @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NMSIS_GCC_H__ +#define __NMSIS_GCC_H__ +/*! + * @file nmsis_gcc.h + * @brief NMSIS compiler GCC header file + */ +#include +#include "riscv_encoding.h" + +#ifdef __cplusplus + extern "C" { +#endif + +/* ######################### Startup and Lowlevel Init ######################## */ +/** + * \defgroup NMSIS_Core_CompilerControl Compiler Control + * \ingroup NMSIS_Core + * \brief Compiler agnostic \#define symbols for generic c/c++ source code + * \details + * + * The NMSIS-Core provides the header file nmsis_compiler.h with consistent \#define symbols for generate C or C++ source files that should be compiler agnostic. + * Each NMSIS compliant compiler should support the functionality described in this section. + * + * The header file nmsis_compiler.h is also included by each Device Header File so that these definitions are available. + * @{ + */ +/* ignore some GCC warnings */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-conversion" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wunused-parameter" + +/* Fallback for __has_builtin */ +#ifndef __has_builtin + #define __has_builtin(x) (0) +#endif + +/* NMSIS compiler specific defines */ +/** \brief Pass information from the compiler to the assembler. */ +#ifndef __ASM + #define __ASM __asm +#endif + +/** \brief Recommend that function should be inlined by the compiler. */ +#ifndef __INLINE + #define __INLINE inline +#endif + +/** \brief Define a static function that may be inlined by the compiler. */ +#ifndef __STATIC_INLINE + #define __STATIC_INLINE static inline +#endif + +/** \brief Define a static function that should be always inlined by the compiler. */ +#ifndef __STATIC_FORCEINLINE + #define __STATIC_FORCEINLINE __attribute__((always_inline)) static inline +#endif + +/** \brief Inform the compiler that a function does not return. */ +#ifndef __NO_RETURN + #define __NO_RETURN __attribute__((__noreturn__)) +#endif + +/** \brief Inform that a variable shall be retained in executable image. */ +#ifndef __USED + #define __USED __attribute__((used)) +#endif + +/** \brief restrict pointer qualifier to enable additional optimizations. */ +#ifndef __WEAK + #define __WEAK __attribute__((weak)) +#endif + +/** \brief specified the vector size of the variable, measured in bytes */ +#ifndef __VECTOR_SIZE + #define __VECTOR_SIZE(x) __attribute__((vector_size(x))) +#endif + +/** \brief Request smallest possible alignment. */ +#ifndef __PACKED + #define __PACKED __attribute__((packed, aligned(1))) +#endif + +/** \brief Request smallest possible alignment for a structure. */ +#ifndef __PACKED_STRUCT + #define __PACKED_STRUCT struct __attribute__((packed, aligned(1))) +#endif + +/** \brief Request smallest possible alignment for a union. */ +#ifndef __PACKED_UNION + #define __PACKED_UNION union __attribute__((packed, aligned(1))) +#endif + +#ifndef __UNALIGNED_UINT16_WRITE + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wpacked" + #pragma GCC diagnostic ignored "-Wattributes" + /** \brief Packed struct for unaligned uint16_t write access */ + __PACKED_STRUCT T_UINT16_WRITE { + uint16_t v; + }; + #pragma GCC diagnostic pop + /** \brief Pointer for unaligned write of a uint16_t variable. */ + #define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val)) +#endif + +#ifndef __UNALIGNED_UINT16_READ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wpacked" + #pragma GCC diagnostic ignored "-Wattributes" + /** \brief Packed struct for unaligned uint16_t read access */ + __PACKED_STRUCT T_UINT16_READ { + uint16_t v; + }; + #pragma GCC diagnostic pop + /** \brief Pointer for unaligned read of a uint16_t variable. */ + #define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v) +#endif + +#ifndef __UNALIGNED_UINT32_WRITE + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wpacked" + #pragma GCC diagnostic ignored "-Wattributes" + /** \brief Packed struct for unaligned uint32_t write access */ + __PACKED_STRUCT T_UINT32_WRITE { + uint32_t v; + }; + #pragma GCC diagnostic pop + /** \brief Pointer for unaligned write of a uint32_t variable. */ + #define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val)) +#endif + +#ifndef __UNALIGNED_UINT32_READ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wpacked" + #pragma GCC diagnostic ignored "-Wattributes" + /** \brief Packed struct for unaligned uint32_t read access */ + __PACKED_STRUCT T_UINT32_READ { + uint32_t v; + }; + #pragma GCC diagnostic pop + /** \brief Pointer for unaligned read of a uint32_t variable. */ + #define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v) +#endif + +/** \brief Minimum `x` bytes alignment for a variable. */ +#ifndef __ALIGNED + #define __ALIGNED(x) __attribute__((aligned(x))) +#endif + +/** \brief restrict pointer qualifier to enable additional optimizations. */ +#ifndef __RESTRICT + #define __RESTRICT __restrict +#endif + +/** \brief Barrier to prevent compiler from reordering instructions. */ +#ifndef __COMPILER_BARRIER + #define __COMPILER_BARRIER() __ASM volatile("":::"memory") +#endif + +/** \brief provide the compiler with branch prediction information, the branch is usually true */ +#ifndef __USUALLY + #define __USUALLY(exp) __builtin_expect((exp), 1) +#endif + +/** \brief provide the compiler with branch prediction information, the branch is rarely true */ +#ifndef __RARELY + #define __RARELY(exp) __builtin_expect((exp), 0) +#endif + +/** \brief Use this attribute to indicate that the specified function is an interrupt handler. */ +#ifndef __INTERRUPT + #define __INTERRUPT __attribute__((interrupt)) +#endif + +/** @} */ /* End of Doxygen Group NMSIS_Core_CompilerControl */ + +/* IO definitions (access restrictions to peripheral registers) */ +/** + * \defgroup NMSIS_Core_PeriphAccess Peripheral Access + * \brief Naming conventions and optional features for accessing peripherals. + * + * The section below describes the naming conventions, requirements, and optional features + * for accessing device specific peripherals. + * Most of the rules also apply to the core peripherals. + * + * The **Device Header File ** contains typically these definition + * and also includes the core specific header files. + * + * @{ + */ +/** \brief Defines 'read only' permissions */ +#ifdef __cplusplus + #define __I volatile +#else + #define __I volatile const +#endif +/** \brief Defines 'write only' permissions */ +#define __O volatile +/** \brief Defines 'read / write' permissions */ +#define __IO volatile + +/* following defines should be used for structure members */ +/** \brief Defines 'read only' structure member permissions */ +#define __IM volatile const +/** \brief Defines 'write only' structure member permissions */ +#define __OM volatile +/** \brief Defines 'read/write' structure member permissions */ +#define __IOM volatile + +/** + * \brief Mask and shift a bit field value for use in a register bit range. + * \details The macro \ref _VAL2FLD uses the #define's _Pos and _Msk of the related bit + * field to shift bit-field values for assigning to a register. + * + * **Example**: + * \code + * ECLIC->CFG = _VAL2FLD(CLIC_CLICCFG_NLBIT, 3); + * \endcode + * \param[in] field Name of the register bit field. + * \param[in] value Value of the bit field. This parameter is interpreted as an uint32_t type. + * \return Masked and shifted value. + */ +#define _VAL2FLD(field, value) (((uint32_t)(value) << field ## _Pos) & field ## _Msk) + +/** + * \brief Mask and shift a register value to extract a bit filed value. + * \details The macro \ref _FLD2VAL uses the #define's _Pos and _Msk of the related bit + * field to extract the value of a bit field from a register. + * + * **Example**: + * \code + * nlbits = _FLD2VAL(CLIC_CLICCFG_NLBIT, ECLIC->CFG); + * \endcode + * \param[in] field Name of the register bit field. + * \param[in] value Value of register. This parameter is interpreted as an uint32_t type. + * \return Masked and shifted bit field value. + */ +#define _FLD2VAL(field, value) (((uint32_t)(value) & field ## _Msk) >> field ## _Pos) + +/** @} */ /* end of group NMSIS_Core_PeriphAccess */ + + +#ifdef __cplusplus +} +#endif +#endif /* __NMSIS_GCC_H__ */ diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_version.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_version.h new file mode 100644 index 000000000..7f05e327e --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/nmsis_version.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __NMSIS_VERSION_H +#define __NMSIS_VERSION_H + +/** + * \defgroup NMSIS_Core_VersionControl Version Control + * \ingroup NMSIS_Core + * \brief Version \#define symbols for NMSIS release specific C/C++ source code + * \details + * + * We followed the [semantic versioning 2.0.0](https://semver.org/) to control NMSIS version. + * The version format is **MAJOR.MINOR.PATCH**, increment the: + * 1. MAJOR version when you make incompatible API changes, + * 2. MINOR version when you add functionality in a backwards compatible manner, and + * 3. PATCH version when you make backwards compatible bug fixes. + * + * The header file `nmsis_version.h` is included by each core header so that these definitions are available. + * + * **Example Usage for NMSIS Version Check**: + * \code + * #if defined(__NMSIS_VERSION) && (__NMSIS_VERSION >= 0x00010105) + * #warning "Yes, we have NMSIS 1.1.5 or later" + * #else + * #error "We need NMSIS 1.1.5 or later!" + * #endif + * \endcode + * + * @{ + */ + +/*! + * \file nmsis_version.h + * \brief NMSIS Version definitions + **/ + +/** + * \brief Represent the NMSIS major version + * \details + * The NMSIS major version can be used to + * differentiate between NMSIS major releases. + * */ +#define __NMSIS_VERSION_MAJOR (1U) + +/** + * \brief Represent the NMSIS minor version + * \details + * The NMSIS minor version can be used to + * query a NMSIS release update including new features. + * + **/ +#define __NMSIS_VERSION_MINOR (0U) + +/** + * \brief Represent the NMSIS patch version + * \details + * The NMSIS patch version can be used to + * show bug fixes in this package. + **/ +#define __NMSIS_VERSION_PATCH (0U) +/** + * \brief Represent the NMSIS Version + * \details + * NMSIS Version format: **MAJOR.MINOR.PATCH** + * * MAJOR: \ref __NMSIS_VERSION_MAJOR, stored in `bits [31:16]` of \ref __NMSIS_VERSION + * * MINOR: \ref __NMSIS_VERSION_MINOR, stored in `bits [15:8]` of \ref __NMSIS_VERSION + * * PATCH: \ref __NMSIS_VERSION_PATCH, stored in `bits [7:0]` of \ref __NMSIS_VERSION + **/ +#define __NMSIS_VERSION ((__NMSIS_VERSION_MAJOR << 16U) | (__NMSIS_VERSION_MINOR << 8) | __NMSIS_VERSION_PATCH) + +/** @} */ /* End of Doxygen Group NMSIS_Core_VersionControl */ +#endif diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/riscv_bits.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/riscv_bits.h new file mode 100644 index 000000000..a18c16863 --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/riscv_bits.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __RISCV_BITS_H__ +#define __RISCV_BITS_H__ + +#ifdef __cplusplus + extern "C" { +#endif + +#if __riscv_xlen == 64 +# define SLL32 sllw +# define STORE sd +# define LOAD ld +# define LWU lwu +# define LOG_REGBYTES 3 +#else +# define SLL32 sll +# define STORE sw +# define LOAD lw +# define LWU lw +# define LOG_REGBYTES 2 +#endif /* __riscv_xlen */ + +#define REGBYTES (1 << LOG_REGBYTES) + +#if __riscv_flen == 64 +# define FPSTORE fsd +# define FPLOAD fld +# define LOG_FPREGBYTES 3 +#else +# define FPSTORE fsw +# define FPLOAD flw +# define LOG_FPREGBYTES 2 +#endif /* __riscv_flen */ +#define FPREGBYTES (1 << LOG_FPREGBYTES) + +#define __rv_likely(x) __builtin_expect((x), 1) +#define __rv_unlikely(x) __builtin_expect((x), 0) + +#define __RV_ROUNDUP(a, b) ((((a)-1)/(b)+1)*(b)) +#define __RV_ROUNDDOWN(a, b) ((a)/(b)*(b)) + +#define __RV_MAX(a, b) ((a) > (b) ? (a) : (b)) +#define __RV_MIN(a, b) ((a) < (b) ? (a) : (b)) +#define __RV_CLAMP(a, lo, hi) MIN(MAX(a, lo), hi) + +#define __RV_EXTRACT_FIELD(val, which) (((val) & (which)) / ((which) & ~((which)-1))) +#define __RV_INSERT_FIELD(val, which, fieldval) (((val) & ~(which)) | ((fieldval) * ((which) & ~((which)-1)))) + +#ifdef __ASSEMBLY__ +#define _AC(X,Y) X +#define _AT(T,X) X +#else +#define __AC(X,Y) (X##Y) +#define _AC(X,Y) __AC(X,Y) +#define _AT(T,X) ((T)(X)) +#endif /* __ASSEMBLY__ */ + +#define _UL(x) (_AC(x, UL)) +#define _ULL(x) (_AC(x, ULL)) + +#define _BITUL(x) (_UL(1) << (x)) +#define _BITULL(x) (_ULL(1) << (x)) + +#define UL(x) (_UL(x)) +#define ULL(x) (_ULL(x)) + +#define STR(x) XSTR(x) +#define XSTR(x) #x +#define __STR(s) #s +#define STRINGIFY(s) __STR(s) + +#ifdef __cplusplus +} +#endif + +#endif /** __RISCV_BITS_H__ */ diff --git a/external/arch/riscv/nuclei/NMSIS/Core/Include/riscv_encoding.h b/external/arch/riscv/nuclei/NMSIS/Core/Include/riscv_encoding.h new file mode 100644 index 000000000..899f8bbc6 --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/Core/Include/riscv_encoding.h @@ -0,0 +1,617 @@ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __RISCV_ENCODING_H__ +#define __RISCV_ENCODING_H__ + +#include "riscv_bits.h" +#ifdef __cplusplus + extern "C" { +#endif +/** + * \defgroup NMSIS_Core_CSR_Encoding Core CSR Encodings + * \ingroup NMSIS_Core + * \brief NMSIS Core CSR Encodings + * \details + * + * The following macros are used for CSR encodings + * @{ + */ +#define MSTATUS_UIE 0x00000001 +#define MSTATUS_SIE 0x00000002 +#define MSTATUS_HIE 0x00000004 +#define MSTATUS_MIE 0x00000008 +#define MSTATUS_UPIE 0x00000010 +#define MSTATUS_SPIE 0x00000020 +#define MSTATUS_HPIE 0x00000040 +#define MSTATUS_MPIE 0x00000080 +#define MSTATUS_SPP 0x00000100 +#define MSTATUS_MPP 0x00001800 +#define MSTATUS_FS 0x00006000 +#define MSTATUS_XS 0x00018000 +#define MSTATUS_MPRV 0x00020000 +#define MSTATUS_PUM 0x00040000 +#define MSTATUS_MXR 0x00080000 +#define MSTATUS_VM 0x1F000000 +#define MSTATUS32_SD 0x80000000 +#define MSTATUS64_SD 0x8000000000000000 + +#define MSTATUS_FS_INITIAL 0x00002000 +#define MSTATUS_FS_CLEAN 0x00004000 +#define MSTATUS_FS_DIRTY 0x00006000 + +#define SSTATUS_UIE 0x00000001 +#define SSTATUS_SIE 0x00000002 +#define SSTATUS_UPIE 0x00000010 +#define SSTATUS_SPIE 0x00000020 +#define SSTATUS_SPP 0x00000100 +#define SSTATUS_FS 0x00006000 +#define SSTATUS_XS 0x00018000 +#define SSTATUS_PUM 0x00040000 +#define SSTATUS32_SD 0x80000000 +#define SSTATUS64_SD 0x8000000000000000 + +#define CSR_MCACHE_CTL_IE 0x00000001 +#define CSR_MCACHE_CTL_DE 0x00010000 + +#define DCSR_XDEBUGVER (3U<<30) +#define DCSR_NDRESET (1<<29) +#define DCSR_FULLRESET (1<<28) +#define DCSR_EBREAKM (1<<15) +#define DCSR_EBREAKH (1<<14) +#define DCSR_EBREAKS (1<<13) +#define DCSR_EBREAKU (1<<12) +#define DCSR_STOPCYCLE (1<<10) +#define DCSR_STOPTIME (1<<9) +#define DCSR_CAUSE (7<<6) +#define DCSR_DEBUGINT (1<<5) +#define DCSR_HALT (1<<3) +#define DCSR_STEP (1<<2) +#define DCSR_PRV (3<<0) + +#define DCSR_CAUSE_NONE 0 +#define DCSR_CAUSE_SWBP 1 +#define DCSR_CAUSE_HWBP 2 +#define DCSR_CAUSE_DEBUGINT 3 +#define DCSR_CAUSE_STEP 4 +#define DCSR_CAUSE_HALT 5 + +#define MCONTROL_TYPE(xlen) (0xfULL<<((xlen)-4)) +#define MCONTROL_DMODE(xlen) (1ULL<<((xlen)-5)) +#define MCONTROL_MASKMAX(xlen) (0x3fULL<<((xlen)-11)) + +#define MCONTROL_SELECT (1<<19) +#define MCONTROL_TIMING (1<<18) +#define MCONTROL_ACTION (0x3f<<12) +#define MCONTROL_CHAIN (1<<11) +#define MCONTROL_MATCH (0xf<<7) +#define MCONTROL_M (1<<6) +#define MCONTROL_H (1<<5) +#define MCONTROL_S (1<<4) +#define MCONTROL_U (1<<3) +#define MCONTROL_EXECUTE (1<<2) +#define MCONTROL_STORE (1<<1) +#define MCONTROL_LOAD (1<<0) + +#define MCONTROL_TYPE_NONE 0 +#define MCONTROL_TYPE_MATCH 2 + +#define MCONTROL_ACTION_DEBUG_EXCEPTION 0 +#define MCONTROL_ACTION_DEBUG_MODE 1 +#define MCONTROL_ACTION_TRACE_START 2 +#define MCONTROL_ACTION_TRACE_STOP 3 +#define MCONTROL_ACTION_TRACE_EMIT 4 + +#define MCONTROL_MATCH_EQUAL 0 +#define MCONTROL_MATCH_NAPOT 1 +#define MCONTROL_MATCH_GE 2 +#define MCONTROL_MATCH_LT 3 +#define MCONTROL_MATCH_MASK_LOW 4 +#define MCONTROL_MATCH_MASK_HIGH 5 + +#define MIP_SSIP (1 << IRQ_S_SOFT) +#define MIP_HSIP (1 << IRQ_H_SOFT) +#define MIP_MSIP (1 << IRQ_M_SOFT) +#define MIP_STIP (1 << IRQ_S_TIMER) +#define MIP_HTIP (1 << IRQ_H_TIMER) +#define MIP_MTIP (1 << IRQ_M_TIMER) +#define MIP_SEIP (1 << IRQ_S_EXT) +#define MIP_HEIP (1 << IRQ_H_EXT) +#define MIP_MEIP (1 << IRQ_M_EXT) + +#define MIE_SSIE MIP_SSIP +#define MIE_HSIE MIP_HSIP +#define MIE_MSIE MIP_MSIP +#define MIE_STIE MIP_STIP +#define MIE_HTIE MIP_HTIP +#define MIE_MTIE MIP_MTIP +#define MIE_SEIE MIP_SEIP +#define MIE_HEIE MIP_HEIP +#define MIE_MEIE MIP_MEIP + +/* === Nuclei custom CSR bit mask === */ + +#define WFE_WFE (0x1) +#define TXEVT_TXEVT (0x1) +#define SLEEPVALUE_SLEEPVALUE (0x1) + +#define MCOUNTINHIBIT_IR (1<<2) +#define MCOUNTINHIBIT_CY (1<<0) + +#define MILM_CTL_ILM_BPA (((1ULL<<((__riscv_xlen)-10))-1)<<10) +#define MILM_CTL_ILM_EN (1<<0) + +#define MDLM_CTL_DLM_BPA (((1ULL<<((__riscv_xlen)-10))-1)<<10) +#define MDLM_CTL_DLM_EN (1<<0) + +#define MSUBM_PTYP (0x3<<8) +#define MSUBM_TYP (0x3<<6) + +#define MDCAUSE_MDCAUSE (0x3) + +#define MMISC_CTL_NMI_CAUSE_FFF (1<<9) +#define MMISC_CTL_MISALIGN (1<<6) +#define MMISC_CTL_BPU (1<<3) + +#define MCACHE_CTL_IC_EN (1<<0) +#define MCACHE_CTL_IC_SCPD_MOD (1<<1) +#define MCACHE_CTL_DC_EN (1<<16) + +#define MTVT2_MTVT2EN (1<<0) +#define MTVT2_COMMON_CODE_ENTRY (((1ULL<<((__riscv_xlen)-2))-1)<<2) + +#define MCFG_INFO_TEE (1<<0) +#define MCFG_INFO_ECC (1<<1) +#define MCFG_INFO_CLIC (1<<2) +#define MCFG_INFO_PLIC (1<<3) +#define MCFG_INFO_FIO (1<<4) +#define MCFG_INFO_PPI (1<<5) +#define MCFG_INFO_NICE (1<<6) +#define MCFG_INFO_ILM (1<<7) +#define MCFG_INFO_DLM (1<<8) +#define MCFG_INFO_ICACHE (1<<9) +#define MCFG_INFO_DCACHE (1<<10) + +#define MICFG_IC_SET (0xF<<0) +#define MICFG_IC_WAY (0x7<<4) +#define MICFG_IC_LSIZE (0x7<<7) +#define MICFG_ILM_SIZE (0x1F<<16) +#define MICFG_ILM_XONLY (1<<21) + +#define MDCFG_DC_SET (0xF<<0) +#define MDCFG_DC_WAY (0x7<<4) +#define MDCFG_DC_LSIZE (0x7<<7) +#define MDCFG_DLM_SIZE (0x1F<<16) + +#define MPPICFG_INFO_PPI_SIZE (0x1F<<1) +#define MPPICFG_INFO_PPI_BPA (((1ULL<<((__riscv_xlen)-10))-1)<<10) + +#define MFIOCFG_INFO_FIO_SIZE (0x1F<<1) +#define MFIOCFG_INFO_FIO_BPA (((1ULL<<((__riscv_xlen)-10))-1)<<10) + +#define SIP_SSIP MIP_SSIP +#define SIP_STIP MIP_STIP + +#define PRV_U 0 +#define PRV_S 1 +#define PRV_H 2 +#define PRV_M 3 + +#define VM_MBARE 0 +#define VM_MBB 1 +#define VM_MBBID 2 +#define VM_SV32 8 +#define VM_SV39 9 +#define VM_SV48 10 + +#define IRQ_S_SOFT 1 +#define IRQ_H_SOFT 2 +#define IRQ_M_SOFT 3 +#define IRQ_S_TIMER 5 +#define IRQ_H_TIMER 6 +#define IRQ_M_TIMER 7 +#define IRQ_S_EXT 9 +#define IRQ_H_EXT 10 +#define IRQ_M_EXT 11 +#define IRQ_COP 12 +#define IRQ_HOST 13 + +#define DEFAULT_RSTVEC 0x00001000 +#define DEFAULT_NMIVEC 0x00001004 +#define DEFAULT_MTVEC 0x00001010 +#define CONFIG_STRING_ADDR 0x0000100C +#define EXT_IO_BASE 0x40000000 +#define DRAM_BASE 0x80000000 + +/* === FPU FRM Rounding Mode === */ +/** FPU Round to Nearest, ties to Even*/ +#define FRM_RNDMODE_RNE 0x0 +/** FPU Round Towards Zero */ +#define FRM_RNDMODE_RTZ 0x1 +/** FPU Round Down (towards -inf) */ +#define FRM_RNDMODE_RDN 0x2 +/** FPU Round Up (towards +inf) */ +#define FRM_RNDMODE_RUP 0x3 +/** FPU Round to nearest, ties to Max Magnitude */ +#define FRM_RNDMODE_RMM 0x4 +/** + * In instruction's rm, selects dynamic rounding mode. + * In Rounding Mode register, Invalid */ +#define FRM_RNDMODE_DYN 0x7 + +/* === FPU FFLAGS Accrued Exceptions === */ +/** FPU Inexact */ +#define FFLAGS_AE_NX (1<<0) +/** FPU Underflow */ +#define FFLAGS_AE_UF (1<<1) +/** FPU Overflow */ +#define FFLAGS_AE_OF (1<<2) +/** FPU Divide by Zero */ +#define FFLAGS_AE_DZ (1<<3) +/** FPU Invalid Operation */ +#define FFLAGS_AE_NV (1<<4) + +/** Floating Point Register f0-f31, eg. f0 -> FREG(0) */ +#define FREG(idx) f##idx + + +/* === PMP CFG Bits === */ +#define PMP_R 0x01 +#define PMP_W 0x02 +#define PMP_X 0x04 +#define PMP_A 0x18 +#define PMP_A_TOR 0x08 +#define PMP_A_NA4 0x10 +#define PMP_A_NAPOT 0x18 +#define PMP_L 0x80 + +#define PMP_SHIFT 2 +#define PMP_COUNT 16 + +// page table entry (PTE) fields +#define PTE_V 0x001 // Valid +#define PTE_R 0x002 // Read +#define PTE_W 0x004 // Write +#define PTE_X 0x008 // Execute +#define PTE_U 0x010 // User +#define PTE_G 0x020 // Global +#define PTE_A 0x040 // Accessed +#define PTE_D 0x080 // Dirty +#define PTE_SOFT 0x300 // Reserved for Software + +#define PTE_PPN_SHIFT 10 + +#define PTE_TABLE(PTE) (((PTE) & (PTE_V | PTE_R | PTE_W | PTE_X)) == PTE_V) + +#ifdef __riscv + +#ifdef __riscv64 +# define MSTATUS_SD MSTATUS64_SD +# define SSTATUS_SD SSTATUS64_SD +# define RISCV_PGLEVEL_BITS 9 +#else +# define MSTATUS_SD MSTATUS32_SD +# define SSTATUS_SD SSTATUS32_SD +# define RISCV_PGLEVEL_BITS 10 +#endif /* __riscv64 */ + +#define RISCV_PGSHIFT 12 +#define RISCV_PGSIZE (1 << RISCV_PGSHIFT) + +#endif /* __riscv */ + +#define DOWNLOAD_MODE_FLASHXIP 0 +#define DOWNLOAD_MODE_FLASH 1 +#define DOWNLOAD_MODE_ILM 2 +#define DOWNLOAD_MODE_DDR 3 + +/** + * \defgroup NMSIS_Core_CSR_Registers Core CSR Registers + * \ingroup NMSIS_Core + * \brief NMSIS Core CSR Register Definitions + * \details + * + * The following macros are used for CSR Register Defintions. + * @{ + */ +/* === Standard RISC-V CSR Registers === */ +#define CSR_USTATUS 0x0 +#define CSR_FFLAGS 0x1 +#define CSR_FRM 0x2 +#define CSR_FCSR 0x3 +#define CSR_CYCLE 0xc00 +#define CSR_TIME 0xc01 +#define CSR_INSTRET 0xc02 +#define CSR_HPMCOUNTER3 0xc03 +#define CSR_HPMCOUNTER4 0xc04 +#define CSR_HPMCOUNTER5 0xc05 +#define CSR_HPMCOUNTER6 0xc06 +#define CSR_HPMCOUNTER7 0xc07 +#define CSR_HPMCOUNTER8 0xc08 +#define CSR_HPMCOUNTER9 0xc09 +#define CSR_HPMCOUNTER10 0xc0a +#define CSR_HPMCOUNTER11 0xc0b +#define CSR_HPMCOUNTER12 0xc0c +#define CSR_HPMCOUNTER13 0xc0d +#define CSR_HPMCOUNTER14 0xc0e +#define CSR_HPMCOUNTER15 0xc0f +#define CSR_HPMCOUNTER16 0xc10 +#define CSR_HPMCOUNTER17 0xc11 +#define CSR_HPMCOUNTER18 0xc12 +#define CSR_HPMCOUNTER19 0xc13 +#define CSR_HPMCOUNTER20 0xc14 +#define CSR_HPMCOUNTER21 0xc15 +#define CSR_HPMCOUNTER22 0xc16 +#define CSR_HPMCOUNTER23 0xc17 +#define CSR_HPMCOUNTER24 0xc18 +#define CSR_HPMCOUNTER25 0xc19 +#define CSR_HPMCOUNTER26 0xc1a +#define CSR_HPMCOUNTER27 0xc1b +#define CSR_HPMCOUNTER28 0xc1c +#define CSR_HPMCOUNTER29 0xc1d +#define CSR_HPMCOUNTER30 0xc1e +#define CSR_HPMCOUNTER31 0xc1f +#define CSR_SSTATUS 0x100 +#define CSR_SIE 0x104 +#define CSR_STVEC 0x105 +#define CSR_SSCRATCH 0x140 +#define CSR_SEPC 0x141 +#define CSR_SCAUSE 0x142 +#define CSR_SBADADDR 0x143 +#define CSR_SIP 0x144 +#define CSR_SPTBR 0x180 +#define CSR_MSTATUS 0x300 +#define CSR_MISA 0x301 +#define CSR_MEDELEG 0x302 +#define CSR_MIDELEG 0x303 +#define CSR_MIE 0x304 +#define CSR_MTVEC 0x305 +#define CSR_MCOUNTEREN 0x306 +#define CSR_MSCRATCH 0x340 +#define CSR_MEPC 0x341 +#define CSR_MCAUSE 0x342 +#define CSR_MBADADDR 0x343 +#define CSR_MTVAL 0x343 +#define CSR_MIP 0x344 +#define CSR_PMPCFG0 0x3a0 +#define CSR_PMPCFG1 0x3a1 +#define CSR_PMPCFG2 0x3a2 +#define CSR_PMPCFG3 0x3a3 +#define CSR_PMPADDR0 0x3b0 +#define CSR_PMPADDR1 0x3b1 +#define CSR_PMPADDR2 0x3b2 +#define CSR_PMPADDR3 0x3b3 +#define CSR_PMPADDR4 0x3b4 +#define CSR_PMPADDR5 0x3b5 +#define CSR_PMPADDR6 0x3b6 +#define CSR_PMPADDR7 0x3b7 +#define CSR_PMPADDR8 0x3b8 +#define CSR_PMPADDR9 0x3b9 +#define CSR_PMPADDR10 0x3ba +#define CSR_PMPADDR11 0x3bb +#define CSR_PMPADDR12 0x3bc +#define CSR_PMPADDR13 0x3bd +#define CSR_PMPADDR14 0x3be +#define CSR_PMPADDR15 0x3bf +#define CSR_TSELECT 0x7a0 +#define CSR_TDATA1 0x7a1 +#define CSR_TDATA2 0x7a2 +#define CSR_TDATA3 0x7a3 +#define CSR_DCSR 0x7b0 +#define CSR_DPC 0x7b1 +#define CSR_DSCRATCH 0x7b2 +#define CSR_MCYCLE 0xb00 +#define CSR_MINSTRET 0xb02 +#define CSR_MHPMCOUNTER3 0xb03 +#define CSR_MHPMCOUNTER4 0xb04 +#define CSR_MHPMCOUNTER5 0xb05 +#define CSR_MHPMCOUNTER6 0xb06 +#define CSR_MHPMCOUNTER7 0xb07 +#define CSR_MHPMCOUNTER8 0xb08 +#define CSR_MHPMCOUNTER9 0xb09 +#define CSR_MHPMCOUNTER10 0xb0a +#define CSR_MHPMCOUNTER11 0xb0b +#define CSR_MHPMCOUNTER12 0xb0c +#define CSR_MHPMCOUNTER13 0xb0d +#define CSR_MHPMCOUNTER14 0xb0e +#define CSR_MHPMCOUNTER15 0xb0f +#define CSR_MHPMCOUNTER16 0xb10 +#define CSR_MHPMCOUNTER17 0xb11 +#define CSR_MHPMCOUNTER18 0xb12 +#define CSR_MHPMCOUNTER19 0xb13 +#define CSR_MHPMCOUNTER20 0xb14 +#define CSR_MHPMCOUNTER21 0xb15 +#define CSR_MHPMCOUNTER22 0xb16 +#define CSR_MHPMCOUNTER23 0xb17 +#define CSR_MHPMCOUNTER24 0xb18 +#define CSR_MHPMCOUNTER25 0xb19 +#define CSR_MHPMCOUNTER26 0xb1a +#define CSR_MHPMCOUNTER27 0xb1b +#define CSR_MHPMCOUNTER28 0xb1c +#define CSR_MHPMCOUNTER29 0xb1d +#define CSR_MHPMCOUNTER30 0xb1e +#define CSR_MHPMCOUNTER31 0xb1f +#define CSR_MUCOUNTEREN 0x320 +#define CSR_MSCOUNTEREN 0x321 +#define CSR_MHPMEVENT3 0x323 +#define CSR_MHPMEVENT4 0x324 +#define CSR_MHPMEVENT5 0x325 +#define CSR_MHPMEVENT6 0x326 +#define CSR_MHPMEVENT7 0x327 +#define CSR_MHPMEVENT8 0x328 +#define CSR_MHPMEVENT9 0x329 +#define CSR_MHPMEVENT10 0x32a +#define CSR_MHPMEVENT11 0x32b +#define CSR_MHPMEVENT12 0x32c +#define CSR_MHPMEVENT13 0x32d +#define CSR_MHPMEVENT14 0x32e +#define CSR_MHPMEVENT15 0x32f +#define CSR_MHPMEVENT16 0x330 +#define CSR_MHPMEVENT17 0x331 +#define CSR_MHPMEVENT18 0x332 +#define CSR_MHPMEVENT19 0x333 +#define CSR_MHPMEVENT20 0x334 +#define CSR_MHPMEVENT21 0x335 +#define CSR_MHPMEVENT22 0x336 +#define CSR_MHPMEVENT23 0x337 +#define CSR_MHPMEVENT24 0x338 +#define CSR_MHPMEVENT25 0x339 +#define CSR_MHPMEVENT26 0x33a +#define CSR_MHPMEVENT27 0x33b +#define CSR_MHPMEVENT28 0x33c +#define CSR_MHPMEVENT29 0x33d +#define CSR_MHPMEVENT30 0x33e +#define CSR_MHPMEVENT31 0x33f +#define CSR_MVENDORID 0xf11 +#define CSR_MARCHID 0xf12 +#define CSR_MIMPID 0xf13 +#define CSR_MHARTID 0xf14 +#define CSR_CYCLEH 0xc80 +#define CSR_TIMEH 0xc81 +#define CSR_INSTRETH 0xc82 +#define CSR_HPMCOUNTER3H 0xc83 +#define CSR_HPMCOUNTER4H 0xc84 +#define CSR_HPMCOUNTER5H 0xc85 +#define CSR_HPMCOUNTER6H 0xc86 +#define CSR_HPMCOUNTER7H 0xc87 +#define CSR_HPMCOUNTER8H 0xc88 +#define CSR_HPMCOUNTER9H 0xc89 +#define CSR_HPMCOUNTER10H 0xc8a +#define CSR_HPMCOUNTER11H 0xc8b +#define CSR_HPMCOUNTER12H 0xc8c +#define CSR_HPMCOUNTER13H 0xc8d +#define CSR_HPMCOUNTER14H 0xc8e +#define CSR_HPMCOUNTER15H 0xc8f +#define CSR_HPMCOUNTER16H 0xc90 +#define CSR_HPMCOUNTER17H 0xc91 +#define CSR_HPMCOUNTER18H 0xc92 +#define CSR_HPMCOUNTER19H 0xc93 +#define CSR_HPMCOUNTER20H 0xc94 +#define CSR_HPMCOUNTER21H 0xc95 +#define CSR_HPMCOUNTER22H 0xc96 +#define CSR_HPMCOUNTER23H 0xc97 +#define CSR_HPMCOUNTER24H 0xc98 +#define CSR_HPMCOUNTER25H 0xc99 +#define CSR_HPMCOUNTER26H 0xc9a +#define CSR_HPMCOUNTER27H 0xc9b +#define CSR_HPMCOUNTER28H 0xc9c +#define CSR_HPMCOUNTER29H 0xc9d +#define CSR_HPMCOUNTER30H 0xc9e +#define CSR_HPMCOUNTER31H 0xc9f +#define CSR_MCYCLEH 0xb80 +#define CSR_MINSTRETH 0xb82 +#define CSR_MHPMCOUNTER3H 0xb83 +#define CSR_MHPMCOUNTER4H 0xb84 +#define CSR_MHPMCOUNTER5H 0xb85 +#define CSR_MHPMCOUNTER6H 0xb86 +#define CSR_MHPMCOUNTER7H 0xb87 +#define CSR_MHPMCOUNTER8H 0xb88 +#define CSR_MHPMCOUNTER9H 0xb89 +#define CSR_MHPMCOUNTER10H 0xb8a +#define CSR_MHPMCOUNTER11H 0xb8b +#define CSR_MHPMCOUNTER12H 0xb8c +#define CSR_MHPMCOUNTER13H 0xb8d +#define CSR_MHPMCOUNTER14H 0xb8e +#define CSR_MHPMCOUNTER15H 0xb8f +#define CSR_MHPMCOUNTER16H 0xb90 +#define CSR_MHPMCOUNTER17H 0xb91 +#define CSR_MHPMCOUNTER18H 0xb92 +#define CSR_MHPMCOUNTER19H 0xb93 +#define CSR_MHPMCOUNTER20H 0xb94 +#define CSR_MHPMCOUNTER21H 0xb95 +#define CSR_MHPMCOUNTER22H 0xb96 +#define CSR_MHPMCOUNTER23H 0xb97 +#define CSR_MHPMCOUNTER24H 0xb98 +#define CSR_MHPMCOUNTER25H 0xb99 +#define CSR_MHPMCOUNTER26H 0xb9a +#define CSR_MHPMCOUNTER27H 0xb9b +#define CSR_MHPMCOUNTER28H 0xb9c +#define CSR_MHPMCOUNTER29H 0xb9d +#define CSR_MHPMCOUNTER30H 0xb9e +#define CSR_MHPMCOUNTER31H 0xb9f + +/* === CLIC CSR Registers === */ +#define CSR_MTVT 0x307 +#define CSR_MNXTI 0x345 +#define CSR_MINTSTATUS 0x346 +#define CSR_MSCRATCHCSW 0x348 +#define CSR_MSCRATCHCSWL 0x349 +#define CSR_MCLICBASE 0x350 + +/* === Nuclei custom CSR Registers === */ +#define CSR_MCOUNTINHIBIT 0x320 +#define CSR_MILM_CTL 0x7C0 +#define CSR_MDLM_CTL 0x7C1 +#define CSR_MNVEC 0x7C3 +#define CSR_MSUBM 0x7C4 +#define CSR_MDCAUSE 0x7C9 +#define CSR_MCACHE_CTL 0x7CA +#define CSR_MMISC_CTL 0x7D0 +#define CSR_MSAVESTATUS 0x7D6 +#define CSR_MSAVEEPC1 0x7D7 +#define CSR_MSAVECAUSE1 0x7D8 +#define CSR_MSAVEEPC2 0x7D9 +#define CSR_MSAVECAUSE2 0x7DA +#define CSR_MSAVEDCAUSE1 0x7DB +#define CSR_MSAVEDCAUSE2 0x7DC +#define CSR_PUSHMSUBM 0x7EB +#define CSR_MTVT2 0x7EC +#define CSR_JALMNXTI 0x7ED +#define CSR_PUSHMCAUSE 0x7EE +#define CSR_PUSHMEPC 0x7EF +#define CSR_MPPICFG_INFO 0x7F0 +#define CSR_MFIOCFG_INFO 0x7F1 +#define CSR_SLEEPVALUE 0x811 +#define CSR_TXEVT 0x812 +#define CSR_WFE 0x810 +#define CSR_MICFG_INFO 0xFC0 +#define CSR_MDCFG_INFO 0xFC1 +#define CSR_MCFG_INFO 0xFC2 + +/** @} */ /** End of Doxygen Group NMSIS_Core_CSR_Registers **/ + +/* Exception Code in MCAUSE CSR */ +#define CAUSE_MISALIGNED_FETCH 0x0 +#define CAUSE_FAULT_FETCH 0x1 +#define CAUSE_ILLEGAL_INSTRUCTION 0x2 +#define CAUSE_BREAKPOINT 0x3 +#define CAUSE_MISALIGNED_LOAD 0x4 +#define CAUSE_FAULT_LOAD 0x5 +#define CAUSE_MISALIGNED_STORE 0x6 +#define CAUSE_FAULT_STORE 0x7 +#define CAUSE_USER_ECALL 0x8 +#define CAUSE_SUPERVISOR_ECALL 0x9 +#define CAUSE_HYPERVISOR_ECALL 0xa +#define CAUSE_MACHINE_ECALL 0xb + +/* Exception Subcode in MDCAUSE CSR */ +#define DCAUSE_FAULT_FETCH_PMP 0x1 +#define DCAUSE_FAULT_FETCH_INST 0x2 + +#define DCAUSE_FAULT_LOAD_PMP 0x1 +#define DCAUSE_FAULT_LOAD_INST 0x2 +#define DCAUSE_FAULT_LOAD_NICE 0x3 + +#define DCAUSE_FAULT_STORE_PMP 0x1 +#define DCAUSE_FAULT_STORE_INST 0x2 + +/** @} */ /** End of Doxygen Group NMSIS_Core_CSR_Encoding **/ + +#ifdef __cplusplus +} +#endif +#endif /* __RISCV_ENCODING_H__ */ diff --git a/external/arch/riscv/nuclei/NMSIS/rules.mk b/external/arch/riscv/nuclei/NMSIS/rules.mk new file mode 100644 index 000000000..17c320bf9 --- /dev/null +++ b/external/arch/riscv/nuclei/NMSIS/rules.mk @@ -0,0 +1,3 @@ +LOCAL_DIR := $(GET_LOCAL_DIR) + +GLOBAL_INCLUDES += $(LOCAL_DIR)/Core/Include diff --git a/external/platform/hbird/NMSIS/hbird.h b/external/platform/hbird/NMSIS/hbird.h new file mode 100644 index 000000000..a2afb00ea --- /dev/null +++ b/external/platform/hbird/NMSIS/hbird.h @@ -0,0 +1,459 @@ +/****************************************************************************** + * @file hbird.h + * @brief NMSIS Core Peripheral Access Layer Header File for + * Nuclei HummingBird evaluation SoC which support Nuclei N/NX class cores + * @version V1.00 + * @date 22. Nov 2019 + ******************************************************************************/ +/* + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __HBIRD_H__ +#define __HBIRD_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** @addtogroup Nuclei + * @{ + */ + + +/** @addtogroup hbird + * @{ + */ + + +/** @addtogroup Configuration_of_NMSIS + * @{ + */ + + + +/* =========================================================================================================================== */ +/* ================ Interrupt Number Definition ================ */ +/* =========================================================================================================================== */ + +typedef enum IRQn +{ +/* ======================================= Nuclei Core Specific Interrupt Numbers ======================================== */ + + Reserved0_IRQn = 0, /*!< Internal reserved */ + Reserved1_IRQn = 1, /*!< Internal reserved */ + Reserved2_IRQn = 2, /*!< Internal reserved */ + SysTimerSW_IRQn = 3, /*!< System Timer SW interrupt */ + Reserved3_IRQn = 4, /*!< Internal reserved */ + Reserved4_IRQn = 5, /*!< Internal reserved */ + Reserved5_IRQn = 6, /*!< Internal reserved */ + SysTimer_IRQn = 7, /*!< System Timer Interrupt */ + Reserved6_IRQn = 8, /*!< Internal reserved */ + Reserved7_IRQn = 9, /*!< Internal reserved */ + Reserved8_IRQn = 10, /*!< Internal reserved */ + Reserved9_IRQn = 11, /*!< Internal reserved */ + Reserved10_IRQn = 12, /*!< Internal reserved */ + Reserved11_IRQn = 13, /*!< Internal reserved */ + Reserved12_IRQn = 14, /*!< Internal reserved */ + Reserved13_IRQn = 15, /*!< Internal reserved */ + Reserved14_IRQn = 16, /*!< Internal reserved */ + Reserved15_IRQn = 17, /*!< Internal reserved */ + Reserved16_IRQn = 18, /*!< Internal reserved */ + +/* =========================================== hbird Specific Interrupt Numbers ========================================= */ +/* ToDo: add here your device specific external interrupt numbers. 19~1023 is reserved number for user. Maxmum interrupt supported + could get from clicinfo.NUM_INTERRUPT. According the interrupt handlers defined in startup_Device.s + eg.: Interrupt for Timer#1 eclic_tim1_handler -> TIM1_IRQn */ + SOC_INT19_IRQn = 19, /*!< Device Interrupt */ + SOC_INT20_IRQn = 20, /*!< Device Interrupt */ + SOC_INT21_IRQn = 21, /*!< Device Interrupt */ + SOC_INT22_IRQn = 22, /*!< Device Interrupt */ + SOC_INT23_IRQn = 23, /*!< Device Interrupt */ + SOC_INT24_IRQn = 24, /*!< Device Interrupt */ + SOC_INT25_IRQn = 25, /*!< Device Interrupt */ + SOC_INT26_IRQn = 26, /*!< Device Interrupt */ + SOC_INT27_IRQn = 27, /*!< Device Interrupt */ + SOC_INT28_IRQn = 28, /*!< Device Interrupt */ + SOC_INT29_IRQn = 29, /*!< Device Interrupt */ + SOC_INT30_IRQn = 30, /*!< Device Interrupt */ + SOC_INT31_IRQn = 31, /*!< Device Interrupt */ + SOC_INT32_IRQn = 32, /*!< Device Interrupt */ + SOC_INT33_IRQn = 33, /*!< Device Interrupt */ + SOC_INT34_IRQn = 34, /*!< Device Interrupt */ + SOC_INT35_IRQn = 35, /*!< Device Interrupt */ + SOC_INT36_IRQn = 36, /*!< Device Interrupt */ + SOC_INT37_IRQn = 37, /*!< Device Interrupt */ + SOC_INT38_IRQn = 38, /*!< Device Interrupt */ + SOC_INT39_IRQn = 39, /*!< Device Interrupt */ + SOC_INT40_IRQn = 40, /*!< Device Interrupt */ + SOC_INT41_IRQn = 41, /*!< Device Interrupt */ + SOC_INT42_IRQn = 42, /*!< Device Interrupt */ + SOC_INT43_IRQn = 43, /*!< Device Interrupt */ + SOC_INT44_IRQn = 44, /*!< Device Interrupt */ + SOC_INT45_IRQn = 45, /*!< Device Interrupt */ + SOC_INT46_IRQn = 46, /*!< Device Interrupt */ + SOC_INT47_IRQn = 47, /*!< Device Interrupt */ + SOC_INT48_IRQn = 48, /*!< Device Interrupt */ + SOC_INT49_IRQn = 49, /*!< Device Interrupt */ + SOC_INT50_IRQn = 50, /*!< Device Interrupt */ + SOC_INT_MAX, +} IRQn_Type; + +/* =========================================================================================================================== */ +/* ================ Exception Code Definition ================ */ +/* =========================================================================================================================== */ + +typedef enum EXCn { +/* ======================================= Nuclei N/NX Specific Exception Code ======================================== */ + InsUnalign_EXCn = 0, /*!< Instruction address misaligned */ + InsAccFault_EXCn = 1, /*!< Instruction access fault */ + IlleIns_EXCn = 2, /*!< Illegal instruction */ + Break_EXCn = 3, /*!< Beakpoint */ + LdAddrUnalign_EXCn = 4, /*!< Load address misaligned */ + LdFault_EXCn = 5, /*!< Load access fault */ + StAddrUnalign_EXCn = 6, /*!< Store or AMO address misaligned */ + StAccessFault_EXCn = 7, /*!< Store or AMO access fault */ + UmodeEcall_EXCn = 8, /*!< Environment call from User mode */ + MmodeEcall_EXCn = 11, /*!< Environment call from Machine mode */ + NMI_EXCn = 0xfff, /*!< NMI interrupt */ +} EXCn_Type; + +/* =========================================================================================================================== */ +/* ================ Processor and Core Peripheral Section ================ */ +/* =========================================================================================================================== */ + +/* ToDo: set the defines according your Device */ +/* ToDo: define the correct core revision */ +#if __riscv_xlen == 32 + +#ifndef __NUCLEI_CORE_REV +#define __NUCLEI_N_REV 0x0104 /*!< Core Revision r1p4 */ +#else +#define __NUCLEI_N_REV __NUCLEI_CORE_REV +#endif + +#elif __riscv_xlen == 64 + +#ifndef __NUCLEI_CORE_REV +#define __NUCLEI_NX_REV 0x0100 /*!< Core Revision r1p0 */ +#else +#define __NUCLEI_NX_REV __NUCLEI_CORE_REV +#endif + +#endif /* __riscv_xlen == 64 */ + +/* ToDo: define the correct core features for the hbird */ +#define __ECLIC_PRESENT 1 /*!< Set to 1 if ECLIC is present */ +#define __ECLIC_BASEADDR 0x0C000000UL /*!< Set to ECLIC baseaddr of your device */ + +//#define __ECLIC_INTCTLBITS 3 /*!< Set to 1 - 8, the number of hardware bits are actually implemented in the clicintctl registers. */ +#define __ECLIC_INTNUM 51 /*!< Set to 1 - 1024, total interrupt number of ECLIC Unit */ +#define __SYSTIMER_PRESENT 1 /*!< Set to 1 if System Timer is present */ +#define __SYSTIMER_BASEADDR 0x02000000UL /*!< Set to SysTimer baseaddr of your device */ + +/*!< Set to 0, 1, or 2, 0 not present, 1 single floating point unit present, 2 double floating point unit present */ +#if !defined(__riscv_flen) +#define __FPU_PRESENT 0 +#elif __riscv_flen == 32 +#define __FPU_PRESENT 1 +#else +#define __FPU_PRESENT 2 +#endif + +#define __DSP_PRESENT 1 /*!< Set to 1 if DSP is present */ +#define __PMP_PRESENT 1 /*!< Set to 1 if PMP is present */ +#define __PMP_ENTRY_NUM 16 /*!< Set to 8 or 16, the number of PMP entries */ +#define __ICACHE_PRESENT 0 /*!< Set to 1 if I-Cache is present */ +#define __DCACHE_PRESENT 0 /*!< Set to 1 if D-Cache is present */ +#define __Vendor_SysTickConfig 0 /*!< Set to 1 if different SysTick Config is used */ +#define __Vendor_EXCEPTION 0 /*!< Set to 1 if vendor exception hander is present */ + +/** @} */ /* End of group Configuration_of_CMSIS */ + + +#include /*!< Nuclei N/NX class processor and core peripherals */ +/* ToDo: include your system_hbird.h file + replace 'Device' with your device name */ +#include "system_hbird.h" /*!< hbird System */ + + +/* ======================================== Start of section using anonymous unions ======================================== */ +#if defined (__GNUC__) + /* anonymous unions are enabled by default */ +#else + #warning Not supported compiler type +#endif + +#define RTC_FREQ 32768 +// The TIMER frequency is just the RTC frequency +#define SOC_TIMER_FREQ RTC_FREQ +/* =========================================================================================================================== */ +/* ================ Device Specific Peripheral Section ================ */ +/* =========================================================================================================================== */ + + +/** @addtogroup Device_Peripheral_peripherals + * @{ + */ + +/**************************************************************************** + * Platform definitions + *****************************************************************************/ +// IOF Mappings +#define IOF0_SPI1_MASK _AC(0x000007FC,UL) +#define SPI11_NUM_SS (4) +#define IOF_SPI1_SS0 (2u) +#define IOF_SPI1_SS1 (8u) +#define IOF_SPI1_SS2 (9u) +#define IOF_SPI1_SS3 (10u) +#define IOF_SPI1_MOSI (3u) +#define IOF_SPI1_MISO (4u) +#define IOF_SPI1_SCK (5u) +#define IOF_SPI1_DQ0 (3u) +#define IOF_SPI1_DQ1 (4u) +#define IOF_SPI1_DQ2 (6u) +#define IOF_SPI1_DQ3 (7u) + +#define IOF0_SPI2_MASK _AC(0xFC000000,UL) +#define SPI2_NUM_SS (1) +#define IOF_SPI2_SS0 (26u) +#define IOF_SPI2_MOSI (27u) +#define IOF_SPI2_MISO (28u) +#define IOF_SPI2_SCK (29u) +#define IOF_SPI2_DQ0 (27u) +#define IOF_SPI2_DQ1 (28u) +#define IOF_SPI2_DQ2 (30u) +#define IOF_SPI2_DQ3 (31u) + +#define IOF0_UART0_MASK _AC(0x00030000, UL) +#define IOF_UART0_RX (16u) +#define IOF_UART0_TX (17u) + +#define IOF0_UART1_MASK _AC(0x03000000, UL) +#define IOF_UART1_RX (24u) +#define IOF_UART1_TX (25u) + +#define IOF0_I2C_MASK _AC(0x00003000, UL) +#define IOF_I2C_SDA (12u) +#define IOF_I2C_SCL (13u) + +#define IOF1_PWM0_MASK _AC(0x0000000F, UL) +#define IOF1_PWM1_MASK _AC(0x00780000, UL) +#define IOF1_PWM2_MASK _AC(0x00003C00, UL) + +// Interrupt Numbers +#define SOC_ECLIC_NUM_INTERRUPTS 32 +#define SOC_ECLIC_INT_GPIO_BASE 19 + +// Interrupt Handler Definitions +#define SOC_MTIMER_HANDLER eclic_mtip_handler +#define SOC_SOFTINT_HANDLER eclic_msip_handler + +#define GPIO_BIT_ALL_ZERO (0x0) +#define GPIO_BIT_ALL_ONE (0xFFFFFFFF) + +/** + * @brief GPIO + */ +typedef struct { /*!< GPIO Structure */ + __IOM uint32_t INPUT_VAL; + __IOM uint32_t INPUT_EN; + __IOM uint32_t OUTPUT_EN; + __IOM uint32_t OUTPUT_VAL; + __IOM uint32_t PULLUP_EN; + __IOM uint32_t DRIVE; + __IOM uint32_t RISE_IE; + __IOM uint32_t RISE_IP; + __IOM uint32_t FALL_IE; + __IOM uint32_t FALL_IP; + __IOM uint32_t HIGH_IE; + __IOM uint32_t HIGH_IP; + __IOM uint32_t LOW_IE; + __IOM uint32_t LOW_IP; + __IOM uint32_t IOF_EN; + __IOM uint32_t IOF_SEL; + __IOM uint32_t OUTPUT_XOR; +} GPIO_TypeDef; + +/** + * @brief UART + */ +typedef struct { + __IOM uint32_t TXFIFO; + __IOM uint32_t RXFIFO; + __IOM uint32_t TXCTRL; + __IOM uint32_t RXCTRL; + __IOM uint32_t IE; + __IOM uint32_t IP; + __IOM uint32_t DIV; +} UART_TypeDef; + +/** + * @brief PWM + */ +typedef struct { + __IOM uint32_t CFG; + uint32_t RESERVED0; + __IOM uint32_t COUNT; + uint32_t RESERVED1; + __IOM uint32_t S; + uint32_t RESERVED2[3]; + __IOM uint32_t CMP0; + __IOM uint32_t CMP1; + __IOM uint32_t CMP2; + __IOM uint32_t CMP3; +} PWM_TypeDef; + +/** + * @brief QSPI + */ +typedef struct { + __IOM uint32_t SCKDIV; + __IOM uint32_t SCKMODE; + __IOM uint32_t RESERVED0[2]; + __IOM uint32_t CSID; + __IOM uint32_t CSDEF; + __IOM uint32_t CSMODE; + __IOM uint32_t RESERVED1[3]; + __IOM uint32_t DELAY0; + __IOM uint32_t DELAY1; + __IOM uint32_t RESERVED2[4]; + __IOM uint32_t FMT; + __IOM uint32_t RESERVED3; + __IOM uint32_t TXDATA; + __IOM uint32_t RXDATA; + __IOM uint32_t TXMARK; + __IOM uint32_t RXMARK; + __IOM uint32_t RESERVED4[2]; + __IOM uint32_t FCTRL; + __IOM uint32_t FFMT; + __IOM uint32_t RESERVED5[2]; + __IOM uint32_t IE; + __IOM uint32_t IP; +} QSPI_TypeDef; + +/** + * @brief I2C + */ +typedef struct { + __IOM uint8_t PRERlo; + __IOM uint8_t PRERhi; + __IOM uint8_t CTR; + __IOM uint8_t TXRXR; /* TXR and RXR in same address */ + __IOM uint8_t CSR; /* CR and SR in same address */ +} I2C_TypeDef; + +/*@}*/ /* end of group hbird_Peripherals */ + + +/* ========================================= End of section using anonymous unions ========================================= */ +#if defined (__GNUC__) + /* anonymous unions are enabled by default */ +#else + #warning Not supported compiler type +#endif + + +/* =========================================================================================================================== */ +/* ================ Device Specific Peripheral Address Map ================ */ +/* =========================================================================================================================== */ + + +/* ToDo: add here your device peripherals base addresses + following is an example for timer */ +/** @addtogroup Device_Peripheral_peripheralAddr + * @{ + */ +/* Peripheral and SRAM base address */ +#define QSPI_FLASH_BASE (0x20000000UL) /*!< (FLASH ) Base Address */ +#define ONCHIP_ROM_BASE (0x00001000UL) /*!< (ROM ) Base Address */ +#define ONCHIP_ILM_BASE (0x80000000UL) /*!< (ILM ) Base Address */ +#define ONCHIP_DLM_BASE (0x90000000UL) /*!< (DLM ) Base Address */ +#define HBIRD_PERIPH_BASE (0x10000000UL) /*!< (Peripheral) Base Address */ + +/* Peripheral memory map */ +/* Fast-IO Interfaced IP */ +#define GPIO_BASE (HBIRD_PERIPH_BASE + 0x12000) /*!< (GPIO) Base Address */ +/* PPI Interfaced IP */ +#define UART0_BASE (HBIRD_PERIPH_BASE + 0x13000) /*!< (UART0) Base Address */ +#define QSPI0_BASE (HBIRD_PERIPH_BASE + 0x14000) /*!< (QSPI0) Base Address */ +#define PWM0_BASE (HBIRD_PERIPH_BASE + 0x15000) /*!< (PWM0) Base Address */ +#define UART1_BASE (HBIRD_PERIPH_BASE + 0x23000) /*!< (UART1) Base Address */ +#define QSPI1_BASE (HBIRD_PERIPH_BASE + 0x24000) /*!< (QSPI1) Base Address */ +#define PWM1_BASE (HBIRD_PERIPH_BASE + 0x25000) /*!< (PWM1) Base Address */ +#define QSPI2_BASE (HBIRD_PERIPH_BASE + 0x34000) /*!< (QSPI2) Base Address */ +#define PWM2_BASE (HBIRD_PERIPH_BASE + 0x35000) /*!< (PWM2) Base Address */ +#define I2C_BASE (HBIRD_PERIPH_BASE + 0x42000) /*!< (I2C Master) Base Address */ + +/** @} */ /* End of group Device_Peripheral_peripheralAddr */ + + +/* =========================================================================================================================== */ +/* ================ Peripheral declaration ================ */ +/* =========================================================================================================================== */ + + +/* ToDo: add here your device peripherals pointer definitions + following is an example for timer */ +/** @addtogroup Device_Peripheral_declaration + * @{ + */ +#define GPIO ((GPIO_TypeDef *) GPIO_BASE) +#define UART0 ((UART_TypeDef *) UART0_BASE) +#define QSPI0 ((QSPI_TypeDef *) QSPI0_BASE) +#define PWM0 ((PWM_TypeDef *) PWM0_BASE) +#define UART1 ((UART_TypeDef *) UART1_BASE) +#define QSPI1 ((QSPI_TypeDef *) QSPI1_BASE) +#define PWM1 ((PWM_TypeDef *) PWM1_BASE) +#define QSPI2 ((QSPI_TypeDef *) QSPI2_BASE) +#define PWM2 ((PWM_TypeDef *) PWM2_BASE) +#define I2C ((I2C_TypeDef *) I2C_BASE) + +// Helper functions +#define _REG8(p, i) (*(volatile uint8_t *) ((p) + (i))) +#define _REG32(p, i) (*(volatile uint32_t *) ((p) + (i))) +#define _REG32P(p, i) ((volatile uint32_t *) ((p) + (i))) + +#define GPIO_REG(offset) _REG32(GPIO_BASE, offset) +#define PWM0_REG(offset) _REG32(PWM0_BASE, offset) +#define PWM1_REG(offset) _REG32(PWM1_BASE, offset) +#define PWM2_REG(offset) _REG32(PWM2_BASE, offset) +#define SPI0_REG(offset) _REG32(QSPI0_BASE, offset) +#define SPI1_REG(offset) _REG32(QSPI1_BASE, offset) +#define SPI2_REG(offset) _REG32(QSPI2_BASE, offset) +#define UART0_REG(offset) _REG32(UART0_BASE, offset) +#define UART1_REG(offset) _REG32(UART1_BASE, offset) +#define I2C_REG(offset) _REG8(I2C_BASE, offset) + +// Misc + +#define NUM_GPIO 32 + +extern uint32_t get_cpu_freq(void); +extern void delay_1ms(uint32_t count); + +/** @} */ /* End of group hbird */ + +/** @} */ /* End of group Nuclei */ + +#ifdef __cplusplus +} +#endif + +#endif /* __HBIRD_H__ */ diff --git a/external/platform/hbird/NMSIS/rules.mk b/external/platform/hbird/NMSIS/rules.mk new file mode 100644 index 000000000..b28200a14 --- /dev/null +++ b/external/platform/hbird/NMSIS/rules.mk @@ -0,0 +1,4 @@ +LOCAL_DIR := $(GET_LOCAL_DIR) + +GLOBAL_INCLUDES += $(LOCAL_DIR) + diff --git a/external/platform/hbird/NMSIS/system_hbird.h b/external/platform/hbird/NMSIS/system_hbird.h new file mode 100644 index 000000000..0c81afbab --- /dev/null +++ b/external/platform/hbird/NMSIS/system_hbird.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2009-2018 Arm Limited. All rights reserved. + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/******************************************************************************* + * @file system_hbird.h + * @brief NMSIS Nuclei N/NX Device Peripheral Access Layer Header File for + * Device + * @version V1.00 + * @date 17. Dec 2019 + ******************************************************************************/ + +#ifndef __SYSTEM_HBIRD_H__ /* ToDo: replace '' with your device name */ +#define __SYSTEM_HBIRD_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +extern uint32_t SystemCoreClock; /*!< System Clock Frequency (Core Clock) */ + +/** + * \brief Setup the microcontroller system. + * \details + * Initialize the System and update the SystemCoreClock variable. + */ +extern void SystemInit (void); + +/** + * \brief Update SystemCoreClock variable. + * \details + * Updates the SystemCoreClock with current core Clock retrieved from cpu registers. + */ +extern void SystemCoreClockUpdate (void); + +/** + * \brief Register an exception handler for exception code EXCn + */ +extern void Exception_Register_EXC(uint32_t EXCn, unsigned long exc_handler); + +/** + * \brief Get current exception handler for exception code EXCn + */ +extern unsigned long Exception_Get_EXC(uint32_t EXCn); + +/** + * \brief Initialize eclic config + */ +extern void ECLIC_Init(void); + +/** + * \brief Initialize a specific IRQ and register the handler + * \details + * This function set vector mode, trigger mode and polarity, interrupt level and priority, + * assign handler for specific IRQn. + */ +extern int32_t ECLIC_Register_IRQ(IRQn_Type IRQn, uint8_t shv, ECLIC_TRIGGER_Type trig_mode, uint8_t lvl, uint8_t priority, void *handler); + +#ifdef __cplusplus +} +#endif + +#endif /* __SYSTEM_HBIRD_H__ */ diff --git a/external/platform/hbird/inc/hbird_gpio.h b/external/platform/hbird/inc/hbird_gpio.h new file mode 100644 index 000000000..f6c880962 --- /dev/null +++ b/external/platform/hbird/inc/hbird_gpio.h @@ -0,0 +1,56 @@ +// See LICENSE for license details. +#ifndef _HBIRD_GPIO_H +#define _HBIRD_GPIO_H + +#ifdef __cplusplus + extern "C" { +#endif + +#define GPIO_INPUT_VAL (0x00) +#define GPIO_INPUT_EN (0x04) +#define GPIO_OUTPUT_EN (0x08) +#define GPIO_OUTPUT_VAL (0x0C) +#define GPIO_PULLUP_EN (0x10) +#define GPIO_DRIVE (0x14) +#define GPIO_RISE_IE (0x18) +#define GPIO_RISE_IP (0x1C) +#define GPIO_FALL_IE (0x20) +#define GPIO_FALL_IP (0x24) +#define GPIO_HIGH_IE (0x28) +#define GPIO_HIGH_IP (0x2C) +#define GPIO_LOW_IE (0x30) +#define GPIO_LOW_IP (0x34) +#define GPIO_IOF_EN (0x38) +#define GPIO_IOF_SEL (0x3C) +#define GPIO_OUTPUT_XOR (0x40) + +typedef enum iof_func { + IOF_SEL_GPIO = 0, + IOF_SEL_0 = 1, + IOF_SEL_1 = 2 +} IOF_FUNC; + +typedef enum gpio_int_type { + GPIO_INT_RISE = 0, + GPIO_INT_FALL = 1, + GPIO_INT_HIGH = 2, + GPIO_INT_LOW = 3 +} GPIO_INT_TYPE; + +int32_t gpio_iof_config(GPIO_TypeDef *gpio, uint32_t mask, IOF_FUNC func); +int32_t gpio_enable_output(GPIO_TypeDef *gpio, uint32_t mask); +int32_t gpio_enable_input(GPIO_TypeDef *gpio, uint32_t mask); +int32_t gpio_write(GPIO_TypeDef *gpio, uint32_t mask, uint32_t value); +int32_t gpio_toggle(GPIO_TypeDef *gpio, uint32_t mask); +int32_t gpio_read(GPIO_TypeDef *gpio, uint32_t mask); +int32_t gpio_set_pue(GPIO_TypeDef *gpio, uint32_t mask, uint32_t value); +int32_t gpio_set_ds(GPIO_TypeDef *gpio, uint32_t mask, uint32_t value); +int32_t gpio_set_outxor(GPIO_TypeDef *gpio, uint32_t mask, uint32_t value); +int32_t gpio_enable_interrupt(GPIO_TypeDef *gpio, uint32_t mask, GPIO_INT_TYPE type); +int32_t gpio_disable_interrupt(GPIO_TypeDef *gpio, uint32_t mask, GPIO_INT_TYPE type); +int32_t gpio_clear_interrupt(GPIO_TypeDef *gpio, uint32_t mask, GPIO_INT_TYPE type); + +#ifdef __cplusplus +} +#endif +#endif /* _HBIRD_GPIO_H */ diff --git a/external/platform/hbird/inc/hbird_uart.h b/external/platform/hbird/inc/hbird_uart.h new file mode 100644 index 000000000..4af314cb6 --- /dev/null +++ b/external/platform/hbird/inc/hbird_uart.h @@ -0,0 +1,75 @@ +// See LICENSE for license details. + +#ifndef _HBIRD_UART_H +#define _HBIRD_UART_H + +#ifdef __cplusplus + extern "C" { +#endif + +/* Register offsets */ +#define UART_REG_TXFIFO 0x00 +#define UART_REG_RXFIFO 0x04 +#define UART_REG_TXCTRL 0x08 +#define UART_REG_RXCTRL 0x0c +#define UART_REG_IE 0x10 +#define UART_REG_IP 0x14 +#define UART_REG_DIV 0x18 + +/* TXCTRL register */ +#define UART_TXEN 0x1 +#define UART_TXWM(x) (((x) & 0xffff) << 16) + +/* RXCTRL register */ +#define UART_RXEN 0x1 +#define UART_RXWM(x) (((x) & 0xffff) << 16) + +/* IP register */ +#define UART_IP_TXWM 0x1 +#define UART_IP_RXWM 0x2 + +#define UART_TXFIFO_FULL (1<<31) +#define UART_RXFIFO_EMPTY (1<<31) + +#define UART_TXCTRL_TXCNT_OFS (16) +#define UART_TXCTRL_TXCNT_MASK (0x7 << UART_TXCTRL_TXCNT_OFS) +#define UART_TXCTRL_TXEN_OFS (0) +#define UART_TXCTRL_TXEN_MASK (0x1 << UART_TXCTRL_TXEN_OFS) +#define UART_TXCTRL_NSTOP_OFS (1) +#define UART_TXCTRL_NSTOP_MASK (0x1 << UART_TXCTRL_TXEN_OFS) + +#define UART_RXCTRL_RXCNT_OFS (16) +#define UART_RXCTRL_RXCNT_MASK (0x7 << UART_RXCTRL_RXCNT_OFS) +#define UART_RXCTRL_RXEN_OFS (0) +#define UART_RXCTRL_RXEN_MASK (0x1 << UART_RXCTRL_RXEN_OFS) + +#define UART_IE_TXIE_OFS (0) +#define UART_IE_TXIE_MASK (0x1 << UART_IE_TXIE_OFS) +#define UART_IE_RXIE_OFS (1) +#define UART_IE_RXIE_MASK (0x1 << UART_IE_RXIE_OFS) + +#define UART_IP_TXIP_OFS (0) +#define UART_IP_TXIP_MASK (0x1 << UART_IP_TXIP_OFS) +#define UART_IP_RXIP_OFS (1) +#define UART_IP_RXIP_MASK (0x1 << UART_IP_RXIP_OFS) + +typedef enum uart_stop_bit { + UART_STOP_BIT_1 = 0, + UART_STOP_BIT_2 = 1 +} UART_STOP_BIT; + +int32_t uart_init(UART_TypeDef *uart, uint32_t baudrate); +int32_t uart_config_stopbit(UART_TypeDef *uart, UART_STOP_BIT stopbit); +int32_t uart_write(UART_TypeDef *uart, uint8_t val); +uint8_t uart_read(UART_TypeDef *uart); +int32_t uart_set_tx_watermark(UART_TypeDef *uart, uint32_t watermark); +int32_t uart_enable_txint(UART_TypeDef *uart); +int32_t uart_disable_txint(UART_TypeDef *uart); +int32_t uart_set_rx_watermark(UART_TypeDef *uart, uint32_t watermark); +int32_t uart_enable_rxint(UART_TypeDef *uart); +int32_t uart_disable_rxint(UART_TypeDef *uart); + +#ifdef __cplusplus +} +#endif +#endif /* _HBIRD_UART_H */ diff --git a/external/platform/hbird/inc/nuclei_sdk_soc.h b/external/platform/hbird/inc/nuclei_sdk_soc.h new file mode 100644 index 000000000..eb46f0243 --- /dev/null +++ b/external/platform/hbird/inc/nuclei_sdk_soc.h @@ -0,0 +1,17 @@ +// See LICENSE for license details. +#ifndef _NUCLEI_SDK_SOC_H +#define _NUCLEI_SDK_SOC_H + +#ifdef __cplusplus + extern "C" { +#endif + +#include "hbird.h" +#include "hbird_uart.h" +#include "hbird_gpio.h" + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/external/platform/hbird/rules.mk b/external/platform/hbird/rules.mk new file mode 100644 index 000000000..455925f80 --- /dev/null +++ b/external/platform/hbird/rules.mk @@ -0,0 +1,15 @@ +LOCAL_DIR := $(GET_LOCAL_DIR) + +MODULE := $(LOCAL_DIR) + +GLOBAL_INCLUDES += $(LOCAL_DIR)/inc + +MODULE_SRCS += \ + $(LOCAL_DIR)/src/hbird_common.c \ + $(LOCAL_DIR)/src/hbird_gpio.c \ + $(LOCAL_DIR)/src/hbird_uart.c \ + $(LOCAL_DIR)/src/system_hbird.c + +include $(LOCAL_DIR)/NMSIS/rules.mk + +include make/module.mk diff --git a/external/platform/hbird/src/hbird_common.c b/external/platform/hbird/src/hbird_common.c new file mode 100644 index 000000000..20c764c52 --- /dev/null +++ b/external/platform/hbird/src/hbird_common.c @@ -0,0 +1,69 @@ +#include "nuclei_sdk_soc.h" + +static uint32_t get_timer_freq() +{ + return SOC_TIMER_FREQ; +} + +uint32_t measure_cpu_freq(uint32_t n) +{ + uint32_t start_mcycle, delta_mcycle; + uint32_t start_mtime, delta_mtime; + uint32_t mtime_freq = get_timer_freq(); + + // Don't start measuruing until we see an mtime tick + uint32_t tmp = (uint32_t)SysTimer_GetLoadValue(); + do { + start_mtime = (uint32_t)SysTimer_GetLoadValue(); + start_mcycle = __RV_CSR_READ(CSR_MCYCLE); + } while (start_mtime == tmp); + + do { + delta_mtime = (uint32_t)SysTimer_GetLoadValue() - start_mtime; + delta_mcycle = __RV_CSR_READ(CSR_MCYCLE) - start_mcycle; + } while (delta_mtime < n); + + return (delta_mcycle / delta_mtime) * mtime_freq + + ((delta_mcycle % delta_mtime) * mtime_freq) / delta_mtime; +} + +uint32_t get_cpu_freq(void) +{ + uint32_t cpu_freq; + + // warm up + measure_cpu_freq(1); + // measure for real + cpu_freq = measure_cpu_freq(100); + + return cpu_freq; +} + +/** + * \brief delay a time in milliseconds + * \details + * provide API for delay + * \param[in] count: count in milliseconds + * \remarks + */ +void delay_1ms(uint32_t count) +{ + uint64_t start_mtime, delta_mtime; + uint64_t delay_ticks = (SOC_TIMER_FREQ * (uint64_t)count) / 1000; + + start_mtime = SysTimer_GetLoadValue(); + + do { + delta_mtime = SysTimer_GetLoadValue() - start_mtime; + } while (delta_mtime < delay_ticks); +} + +#ifdef SIMULATION_XLSPIKE +// never return for xlspike +void xlspike_exit(int status) +{ + // pass exit status via rxfifo register + UART0->RXFIFO = status; + uart_write(UART0, 4); +} +#endif diff --git a/external/platform/hbird/src/hbird_gpio.c b/external/platform/hbird/src/hbird_gpio.c new file mode 100644 index 000000000..1512f4463 --- /dev/null +++ b/external/platform/hbird/src/hbird_gpio.c @@ -0,0 +1,180 @@ +#include "hbird.h" +#include "hbird_gpio.h" + +int32_t gpio_iof_config(GPIO_TypeDef *gpio, uint32_t mask, IOF_FUNC func) +{ + if (__RARELY(gpio == NULL)) { + return -1; + } + switch (func) { + case IOF_SEL_GPIO: + gpio->IOF_EN &= ~mask; + break; + case IOF_SEL_0: + gpio->IOF_SEL &= ~mask; + gpio->IOF_EN |= mask; + break; + case IOF_SEL_1: + gpio->IOF_SEL |= mask; + gpio->IOF_EN |= mask; + break; + default: + break; + } + return 0; +} + +int32_t gpio_enable_output(GPIO_TypeDef *gpio, uint32_t mask) +{ + if (__RARELY(gpio == NULL)) { + return -1; + } + gpio->OUTPUT_EN |= mask; + gpio->INPUT_EN &= ~mask; + return 0; +} + +int32_t gpio_enable_input(GPIO_TypeDef *gpio, uint32_t mask) +{ + if (__RARELY(gpio == NULL)) { + return -1; + } + gpio->INPUT_EN |= mask; + gpio->OUTPUT_EN &= ~mask; + return 0; +} + +int32_t gpio_write(GPIO_TypeDef *gpio, uint32_t mask, uint32_t value) +{ + if (__RARELY(gpio == NULL)) { + return -1; + } + // If value != 0, mean set gpio pin high, otherwise set pin low + if (value) { + gpio->OUTPUT_VAL |= (mask); + } else { + gpio->OUTPUT_VAL &= ~(mask); + } + return 0; +} + +int32_t gpio_toggle(GPIO_TypeDef *gpio, uint32_t mask) +{ + if (__RARELY(gpio == NULL)) { + return -1; + } + gpio->OUTPUT_VAL = (mask ^ gpio->OUTPUT_VAL); + return 0; +} + + +int32_t gpio_read(GPIO_TypeDef *gpio, uint32_t mask) +{ + if (__RARELY(gpio == NULL)) { + return -1; + } + return gpio->INPUT_VAL & mask; +} + +int32_t gpio_set_pue(GPIO_TypeDef *gpio, uint32_t mask, uint32_t value) +{ + if (__RARELY(gpio == NULL)) { + return -1; + } + mask = gpio->PULLUP_EN & (~mask); + gpio->PULLUP_EN = (mask | value); + return 0; +} + +int32_t gpio_set_ds(GPIO_TypeDef *gpio, uint32_t mask, uint32_t value) +{ + if (__RARELY(gpio == NULL)) { + return -1; + } + mask = gpio->DRIVE & (~mask); + gpio->DRIVE = (mask | value); + return 0; +} + +int32_t gpio_set_outxor(GPIO_TypeDef *gpio, uint32_t mask, uint32_t value) +{ + if (__RARELY(gpio == NULL)) { + return -1; + } + mask = gpio->OUTPUT_XOR & (~mask); + gpio->OUTPUT_XOR = (mask | value); + return 0; +} + +int32_t gpio_enable_interrupt(GPIO_TypeDef *gpio, uint32_t mask, GPIO_INT_TYPE type) +{ + if (__RARELY(gpio == NULL)) { + return -1; + } + switch (type) { + case GPIO_INT_RISE: + gpio->RISE_IE |= mask; + break; + case GPIO_INT_FALL: + gpio->FALL_IE |= mask; + break; + case GPIO_INT_HIGH: + gpio->HIGH_IE |= mask; + break; + case GPIO_INT_LOW: + gpio->LOW_IE |= mask; + break; + default: + break; + } + return 0; +} + +int32_t gpio_disable_interrupt(GPIO_TypeDef *gpio, uint32_t mask, GPIO_INT_TYPE type) +{ + if (__RARELY(gpio == NULL)) { + return -1; + } + switch (type) { + case GPIO_INT_RISE: + gpio->RISE_IE &= ~mask; + break; + case GPIO_INT_FALL: + gpio->FALL_IE &= ~mask; + break; + case GPIO_INT_HIGH: + gpio->HIGH_IE &= ~mask; + break; + case GPIO_INT_LOW: + gpio->LOW_IE &= ~mask; + break; + default: + break; + } + return 0; +} + +int32_t gpio_clear_interrupt(GPIO_TypeDef *gpio, uint32_t mask, GPIO_INT_TYPE type) +{ + if (__RARELY(gpio == NULL)) { + return -1; + } + switch (type) { + case GPIO_INT_RISE: + gpio->RISE_IP |= mask; + break; + case GPIO_INT_FALL: + gpio->FALL_IP |= mask; + break; + case GPIO_INT_HIGH: + gpio->HIGH_IP |= mask; + break; + case GPIO_INT_LOW: + gpio->LOW_IP |= mask; + break; + default: + break; + } + return 0; +} + diff --git a/external/platform/hbird/src/hbird_uart.c b/external/platform/hbird/src/hbird_uart.c new file mode 100644 index 000000000..d2ba68405 --- /dev/null +++ b/external/platform/hbird/src/hbird_uart.c @@ -0,0 +1,105 @@ +#include "hbird.h" +#include "hbird_uart.h" + +int32_t uart_init(UART_TypeDef *uart, uint32_t baudrate) +{ + if (__RARELY(uart == NULL)) { + return -1; + } + uart->DIV = SystemCoreClock / baudrate - 1; + uart->TXCTRL |= UART_TXEN; + uart->RXCTRL |= UART_RXEN; + return 0; +} + +int32_t uart_config_stopbit(UART_TypeDef *uart, UART_STOP_BIT stopbit) +{ + if (__RARELY(uart == NULL)) { + return -1; + } + uint32_t stopval = stopbit; + stopval = (stopbit << UART_TXCTRL_NSTOP_OFS) & UART_TXCTRL_TXCNT_MASK; + uart->TXCTRL &= stopval | (~UART_TXCTRL_TXCNT_MASK); + return 0; +} + +int32_t uart_write(UART_TypeDef *uart, uint8_t val) +{ + if (__RARELY(uart == NULL)) { + return -1; + } +#ifndef SIMULATION_XLSPIKE + while (uart->TXFIFO & UART_TXFIFO_FULL); +#endif + uart->TXFIFO = val; + return 0; +} + +uint8_t uart_read(UART_TypeDef *uart) +{ + uint32_t reg; + if (__RARELY(uart == NULL)) { + return -1; + } + do { + reg = uart->RXFIFO; + } + while (reg & UART_RXFIFO_EMPTY); + return (uint8_t)(reg & 0xFF); +} + +int32_t uart_set_tx_watermark(UART_TypeDef *uart, uint32_t watermark) +{ + if (__RARELY(uart == NULL)) { + return -1; + } + watermark = (watermark << UART_TXCTRL_TXCNT_OFS) & UART_TXCTRL_TXCNT_MASK; + uart->TXCTRL &= watermark | (~UART_TXCTRL_TXCNT_MASK); + return 0; +} + +int32_t uart_enable_txint(UART_TypeDef *uart) +{ + if (__RARELY(uart == NULL)) { + return -1; + } + uart->IE |= UART_IE_TXIE_MASK; + return 0; +} + +int32_t uart_disable_txint(UART_TypeDef *uart) +{ + if (__RARELY(uart == NULL)) { + return -1; + } + uart->IE &= ~UART_IE_TXIE_MASK; + return 0; +} + +int32_t uart_set_rx_watermark(UART_TypeDef *uart, uint32_t watermark) +{ + if (__RARELY(uart == NULL)) { + return -1; + } + watermark = (watermark << UART_RXCTRL_RXCNT_OFS) & UART_RXCTRL_RXCNT_MASK; + uart->RXCTRL &= watermark | (~UART_RXCTRL_RXCNT_MASK); + return 0; +} + +int32_t uart_enable_rxint(UART_TypeDef *uart) +{ + if (__RARELY(uart == NULL)) { + return -1; + } + uart->IE |= UART_IE_RXIE_MASK; + return 0; +} + +int32_t uart_disable_rxint(UART_TypeDef *uart) +{ + if (__RARELY(uart == NULL)) { + return -1; + } + uart->IE &= ~UART_IE_RXIE_MASK; + return 0; +} diff --git a/external/platform/hbird/src/system_hbird.c b/external/platform/hbird/src/system_hbird.c new file mode 100644 index 000000000..8cb5f38a8 --- /dev/null +++ b/external/platform/hbird/src/system_hbird.c @@ -0,0 +1,402 @@ +/* + * Copyright (c) 2009-2018 Arm Limited. All rights reserved. + * Copyright (c) 2019 Nuclei Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/****************************************************************************** + * @file system_hbird.c + * @brief NMSIS Nuclei Core Device Peripheral Access Layer Source File for + * Nuclei HummingBird evaluation SoC which support Nuclei N/NX class cores + * @version V1.00 + * @date 22. Nov 2019 + ******************************************************************************/ +#include +#include +#include "nuclei_sdk_hal.h" + +/*---------------------------------------------------------------------------- + Define clocks + *----------------------------------------------------------------------------*/ +/* ToDo: add here your necessary defines for device initialization + following is an example for different system frequencies */ +#ifndef SYSTEM_CLOCK +#define SYSTEM_CLOCK (80000000UL) +#endif + +/** + * \defgroup NMSIS_Core_SystemConfig System Device Configuration + * \brief Functions for system and clock setup available in system_.c. + * \details + * Nuclei provides a template file **system_Device.c** that must be adapted by + * the silicon vendor to match their actual device. As a minimum requirement, + * this file must provide: + * - A device-specific system configuration function, \ref SystemInit. + * - A global variable that contains the system frequency, \ref SystemCoreClock. + * - A global eclic configuration initialization, \ref ECLIC_Init. + * - Global c library \ref _init and \ref _fini functions called right before calling main function. + * - Vendor customized interrupt, exception and nmi handling code, see \ref NMSIS_Core_IntExcNMI_Handling + * + * The file configures the device and, typically, initializes the oscillator (PLL) that is part + * of the microcontroller device. This file might export other functions or variables that provide + * a more flexible configuration of the microcontroller system. + * + * And this file also provided common interrupt, exception and NMI exception handling framework template, + * Silicon vendor can customize these template code as they want. + * + * \note Please pay special attention to the static variable \c SystemCoreClock. This variable might be + * used throughout the whole system initialization and runtime to calculate frequency/time related values. + * Thus one must assure that the variable always reflects the actual system clock speed. + * + * \attention + * Be aware that a value stored to \c SystemCoreClock during low level initialization (i.e. \c SystemInit()) might get + * overwritten by C libray startup code and/or .bss section initialization. + * Thus its highly recommended to call \ref SystemCoreClockUpdate at the beginning of the user \c main() routine. + * + * @{ + */ + +/*---------------------------------------------------------------------------- + System Core Clock Variable + *----------------------------------------------------------------------------*/ +/* ToDo: initialize SystemCoreClock with the system core clock frequency value + achieved after system intitialization. + This means system core clock frequency after call to SystemInit() */ +/** + * \brief Variable to hold the system core clock value + * \details + * Holds the system core clock, which is the system clock frequency supplied to the SysTick + * timer and the processor core clock. This variable can be used by debuggers to query the + * frequency of the debug timer or to configure the trace clock speed. + * + * \attention + * Compilers must be configured to avoid removing this variable in case the application + * program is not using it. Debugging systems require the variable to be physically + * present in memory so that it can be examined to configure the debugger. + */ +uint32_t SystemCoreClock = SYSTEM_CLOCK; /* System Clock Frequency (Core Clock) */ + +/*---------------------------------------------------------------------------- + Clock functions + *----------------------------------------------------------------------------*/ + +/** + * \brief Function to update the variable \ref SystemCoreClock + * \details + * Updates the variable \ref SystemCoreClock and must be called whenever the core clock is changed + * during program execution. The function evaluates the clock register settings and calculates + * the current core clock. + */ +void SystemCoreClockUpdate (void) /* Get Core Clock Frequency */ +{ + /* ToDo: add code to calculate the system frequency based upon the current + * register settings. + * Note: This function can be used to retrieve the system core clock frequeny + * after user changed register settings. + */ + SystemCoreClock = SYSTEM_CLOCK; +} + +/** + * \brief Function to Initialize the system. + * \details + * Initializes the microcontroller system. Typically, this function configures the + * oscillator (PLL) that is part of the microcontroller device. For systems + * with a variable clock speed, it updates the variable \ref SystemCoreClock. + * SystemInit is called from the file startup_device. + */ +void SystemInit (void) +{ + /* ToDo: add code to initialize the system + * Warn: do not use global variables because this function is called before + * reaching pre-main. RW section maybe overwritten afterwards. + */ + SystemCoreClock = SYSTEM_CLOCK; +} + +/** + * \defgroup NMSIS_Core_IntExcNMI_Handling Interrupt and Exception and NMI Handling + * \brief Functions for interrupt, exception and nmi handle available in system_.c. + * \details + * Nuclei provide a template for interrupt, exception and NMI handling. Silicon Vendor could adapat according + * to their requirement. Silicon vendor could implement interface for different exception code and + * replace current implementation. + * + * @{ + */ +/** \brief Max exception handler number, don't include the NMI(0xFFF) one */ +#define MAX_SYSTEM_EXCEPTION_NUM 12 +/** + * \brief Store the exception handlers for each exception ID + * \note + * - This SystemExceptionHandlers are used to store all the handlers for all + * the exception codes Nuclei N/NX core provided. + * - Exception code 0 - 11, totally 12 exceptions are mapped to SystemExceptionHandlers[0:11] + * - Exception for NMI is also re-routed to exception handling(exception code 0xFFF) in startup code configuration, the handler itself is mapped to SystemExceptionHandlers[MAX_SYSTEM_EXCEPTION_NUM] + */ +static unsigned long SystemExceptionHandlers[MAX_SYSTEM_EXCEPTION_NUM+1]; + +/** + * \brief Exception Handler Function Typedef + * \note + * This typedef is only used internal in this system_.c file. + * It is used to do type conversion for registered exception handler before calling it. + */ +typedef void (*EXC_HANDLER) (unsigned long mcause, unsigned long sp); + +/** + * \brief System Default Exception Handler + * \details + * This function provided a default exception and NMI handling code for all exception ids. + * By default, It will just print some information for debug, Vendor can customize it according to its requirements. + */ +static void system_default_exception_handler(unsigned long mcause, unsigned long sp) +{ + /* TODO: Uncomment this if you have implement printf function */ + printf("MCAUSE : 0x%lx\r\n", mcause); + printf("MDCAUSE: 0x%lx\r\n", __RV_CSR_READ(CSR_MDCAUSE)); + printf("MEPC : 0x%lx\r\n", __RV_CSR_READ(CSR_MEPC)); + printf("MTVAL : 0x%lx\r\n", __RV_CSR_READ(CSR_MBADADDR)); + printf("SP : 0x%lx\r\n", sp); + while(1); +} + +/** + * \brief Initialize all the default core exception handlers + * \details + * The core exception handler for each exception id will be initialized to \ref system_default_exception_handler. + * \note + * Called in \ref _init function, used to initialize default exception handlers for all exception IDs + */ +static void Exception_Init(void) +{ + for (int i = 0; i < MAX_SYSTEM_EXCEPTION_NUM+1; i++) { + SystemExceptionHandlers[i] = (unsigned long)system_default_exception_handler; + } +} + +/** + * \brief Register an exception handler for exception code EXCn + * \details + * * For EXCn < \ref MAX_SYSTEM_EXCEPTION_NUM, it will be registered into SystemExceptionHandlers[EXCn-1]. + * * For EXCn == NMI_EXCn, it will be registered into SystemExceptionHandlers[MAX_SYSTEM_EXCEPTION_NUM]. + * \param EXCn See \ref EXCn_Type + * \param exc_handler The exception handler for this exception code EXCn + */ +void Exception_Register_EXC(uint32_t EXCn, unsigned long exc_handler) +{ + if ((EXCn < MAX_SYSTEM_EXCEPTION_NUM) && (EXCn >= 0)) { + SystemExceptionHandlers[EXCn] = exc_handler; + } else if (EXCn == NMI_EXCn) { + SystemExceptionHandlers[MAX_SYSTEM_EXCEPTION_NUM] = exc_handler; + } +} + +/** + * \brief Get current exception handler for exception code EXCn + * \details + * * For EXCn < \ref MAX_SYSTEM_EXCEPTION_NUM, it will return SystemExceptionHandlers[EXCn-1]. + * * For EXCn == NMI_EXCn, it will return SystemExceptionHandlers[MAX_SYSTEM_EXCEPTION_NUM]. + * \param EXCn See \ref EXCn_Type + * \return Current exception handler for exception code EXCn, if not found, return 0. + */ +unsigned long Exception_Get_EXC(uint32_t EXCn) +{ + if ((EXCn < MAX_SYSTEM_EXCEPTION_NUM) && (EXCn >= 0)) { + return SystemExceptionHandlers[EXCn]; + } else if (EXCn == NMI_EXCn) { + return SystemExceptionHandlers[MAX_SYSTEM_EXCEPTION_NUM]; + } else { + return 0; + } +} + +/** + * \brief Common NMI and Exception handler entry + * \details + * This function provided a command entry for NMI and exception. Silicon Vendor could modify + * this template implementation according to requirement. + * \remarks + * - RISCV provided common entry for all types of exception. This is proposed code template + * for exception entry function, Silicon Vendor could modify the implementation. + * - For the core_exception_handler template, we provided exception register function \ref Exception_Register_EXC + * which can help developer to register your exception handler for specific exception number. + */ +uint32_t core_exception_handler(unsigned long mcause, unsigned long sp) +{ + uint32_t EXCn = (uint32_t)(mcause & 0X00000fff); + EXC_HANDLER exc_handler; + + if ((EXCn < MAX_SYSTEM_EXCEPTION_NUM) && (EXCn >= 0)) { + exc_handler = (EXC_HANDLER)SystemExceptionHandlers[EXCn]; + } else if (EXCn == NMI_EXCn) { + exc_handler = (EXC_HANDLER)SystemExceptionHandlers[MAX_SYSTEM_EXCEPTION_NUM]; + } else { + exc_handler = (EXC_HANDLER)system_default_exception_handler; + } + if (exc_handler != NULL) { + exc_handler(mcause, sp); + } + return 0; +} +/** @} */ /* End of Doxygen Group NMSIS_Core_ExceptionAndNMI */ + +/** Banner Print for Nuclei SDK */ +void SystemBannerPrint(void) +{ +#if defined(NUCLEI_BANNER) && (NUCLEI_BANNER == 1) +#ifndef DOWNLOAD_MODE +#error DOWNLOAD_MODE is not defined via build system, please check! +#endif + const char* download_modes[] = {"FLASHXIP", "FLASH", "ILM", "DDR"}; + printf("Nuclei SDK Build Time: %s, %s\r\n", __DATE__, __TIME__); + printf("Download Mode: %s\r\n", download_modes[DOWNLOAD_MODE]); + printf("CPU Frequency %lu Hz\r\n", SystemCoreClock); +#endif +} + +/** + * \brief initialize eclic config + * \details + * ECLIC needs be initialized after boot up, + * Vendor could also change the initialization + * configuration. + */ +void ECLIC_Init(void) +{ + /* Global Configuration about MTH and NLBits. + * TODO: Please adapt it according to your system requirement. + * This function is called in _init function */ + ECLIC_SetMth(0); + ECLIC_SetCfgNlbits(__ECLIC_INTCTLBITS); +} + +/** + * \brief Initialize a specific IRQ and register the handler + * \details + * This function set vector mode, trigger mode and polarity, interrupt level and priority, + * assign handler for specific IRQn. + * \param [in] IRQn NMI interrupt handler address + * \param [in] shv \ref ECLIC_NON_VECTOR_INTERRUPT means non-vector mode, and \ref ECLIC_VECTOR_INTERRUPT is vector mode + * \param [in] trig_mode see \ref ECLIC_TRIGGER_Type + * \param [in] lvl interupt level + * \param [in] priority interrupt priority + * \param [in] handler interrupt handler, if NULL, handler will not be installed + * \return -1 means invalid input parameter. 0 means successful. + * \remarks + * - This function use to configure specific eclic interrupt and register its interrupt handler and enable its interrupt. + * - If the vector table is placed in read-only section(FLASHXIP mode), handler could not be installed + */ +int32_t ECLIC_Register_IRQ(IRQn_Type IRQn, uint8_t shv, ECLIC_TRIGGER_Type trig_mode, uint8_t lvl, uint8_t priority, void *handler) +{ + if ((IRQn > SOC_INT_MAX) || (shv > ECLIC_VECTOR_INTERRUPT) \ + || (trig_mode > ECLIC_NEGTIVE_EDGE_TRIGGER )) { + return -1; + } + + /* set interrupt vector mode */ + ECLIC_SetShvIRQ(IRQn, shv); + /* set interrupt trigger mode and polarity */ + ECLIC_SetTrigIRQ(IRQn, trig_mode); + /* set interrupt level */ + ECLIC_SetLevelIRQ(IRQn, lvl); + /* set interrupt priority */ + ECLIC_SetPriorityIRQ(IRQn, priority); + if (handler != NULL) { + /* set interrupt handler entry to vector table */ + ECLIC_SetVector(IRQn, (rv_csr_t)handler); + } + /* enable interrupt */ + ECLIC_EnableIRQ(IRQn); + return 0; +} +/** @} */ /* End of Doxygen Group NMSIS_Core_ExceptionAndNMI */ + +/** + * \brief early init function before main + * \details + * This function is executed right before main function. + * For RISC-V gnu toolchain, _init function might not be called + * by __libc_init_array function, so we defined a new function + * to do initialization + */ +void _premain_init(void) +{ + /* TODO: Add your own initialization code here, called before main */ + /* __ICACHE_PRESENT and __DCACHE_PRESENT are defined in hbird.h */ +#if defined(__ICACHE_PRESENT) && __ICACHE_PRESENT == 1 + EnableICache(); +#endif +#if defined(__DCACHE_PRESENT) && __DCACHE_PRESENT == 1 + EnableDCache(); +#endif + SystemCoreClock = get_cpu_freq(); + gpio_iof_config(GPIO, IOF0_UART0_MASK, IOF_SEL_0); + uart_init(SOC_DEBUG_UART, 115200); + /* Display banner after UART initialized */ + SystemBannerPrint(); + /* Initialize exception default handlers */ + Exception_Init(); + /* ECLIC initialization, mainly MTH and NLBIT */ + ECLIC_Init(); +} + +/** + * \brief finish function after main + * \param [in] status status code return from main + * \details + * This function is executed right after main function. + * For RISC-V gnu toolchain, _fini function might not be called + * by __libc_fini_array function, so we defined a new function + * to do initialization + */ +void _postmain_fini(int status) +{ + /* TODO: Add your own finishing code here, called after main */ +#ifdef SIMULATION_XLSPIKE +extern void xlspike_exit(int status); + xlspike_exit(status); +#endif +} + +/** + * \brief _init function called in __libc_init_array() + * \details + * This `__libc_init_array()` function is called during startup code, + * user need to implement this function, otherwise when link it will + * error init.c:(.text.__libc_init_array+0x26): undefined reference to `_init' + * \note + * Please use \ref _premain_init function now + */ +void _init(void) +{ + /* Don't put any code here, please use _premain_init now */ +} + +/** + * \brief _fini function called in __libc_fini_array() + * \details + * This `__libc_fini_array()` function is called when exit main. + * user need to implement this function, otherwise when link it will + * error fini.c:(.text.__libc_fini_array+0x28): undefined reference to `_fini' + * \note + * Please use \ref _postmain_fini function now + */ +void _fini(void) +{ + /* Don't put any code here, please use _postmain_fini now */ +} + +/** @} */ /* End of Doxygen Group NMSIS_Core_SystemAndClock */ diff --git a/platform/nuclei-hbird/platform.c b/platform/nuclei-hbird/platform.c new file mode 100644 index 000000000..357d3934e --- /dev/null +++ b/platform/nuclei-hbird/platform.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2015 Travis Geiselbrecht + * + * Use of this source code is governed by a MIT-style + * license that can be found in the LICENSE file or at + * https://opensource.org/licenses/MIT + */ +#include +#include +#include +#include +#include +#include +#include + +#include "platform_p.h" + +void platform_early_init(void) { + sifive_uart_early_init(); +} + +void platform_init(void) { + sifive_uart_init(); +} + +void platform_dputc(char c) { + if (c == '\n') + sifive_uart_write('\r'); + sifive_uart_write(c); +} + +int platform_dgetc(char *c, bool wait) { + return sifive_uart_read(c, wait); +} + + diff --git a/platform/nuclei-hbird/platform_p.h b/platform/nuclei-hbird/platform_p.h new file mode 100644 index 000000000..881817613 --- /dev/null +++ b/platform/nuclei-hbird/platform_p.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2015 Travis Geiselbrecht + * + * Use of this source code is governed by a MIT-style + * license that can be found in the LICENSE file or at + * https://opensource.org/licenses/MIT + */ +#pragma once + +#include + +void sifive_uart_write(int c); +int sifive_uart_read(char *c, bool wait); +void sifive_uart_early_init(void); +void sifive_uart_init(void); + diff --git a/platform/nuclei-hbird/rules.mk b/platform/nuclei-hbird/rules.mk new file mode 100644 index 000000000..c2b9a04eb --- /dev/null +++ b/platform/nuclei-hbird/rules.mk @@ -0,0 +1,25 @@ +LOCAL_DIR := $(GET_LOCAL_DIR) + +MODULE := $(LOCAL_DIR) + +ARCH := riscv +SUBARCH ?= 32 +VARIANT ?= nuclei + +MODULE_SRCS += $(LOCAL_DIR)/platform.c +MODULE_SRCS += $(LOCAL_DIR)/uart.c +MODULE_SRCS += $(LOCAL_DIR)/vectab.c + +ROMBASE ?= 0x80000000 # if running from rom, start here +MEMBASE ?= 0x90000000 +MEMSIZE ?= 0x00010000 # default to 1MB + +# uses a two segment layout, select the appropriate linker script +ARCH_RISCV_TWOSEGMENT := 1 +# sets a few options in the riscv arch +ARCH_RISCV_EMBEDDED := 1 + +MODULE_DEPS += platform/hbird \ + lib/cbuf + +include make/module.mk diff --git a/platform/nuclei-hbird/uart.c b/platform/nuclei-hbird/uart.c new file mode 100644 index 000000000..c46fe82ff --- /dev/null +++ b/platform/nuclei-hbird/uart.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 Travis Geiselbrecht + * + * Use of this source code is governed by a MIT-style + * license that can be found in the LICENSE file or at + * https://opensource.org/licenses/MIT + */ +#include "nuclei_sdk_hal.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "platform_p.h" + +#define LOCAL_TRACE 0 +#define POLLING_RX 1 + +// static volatile unsigned int *const uart_base = (unsigned int *)0; + +#define UART_TXDATA 0 +#define UART_RXDATA 1 +#define UART_TXCTRL 2 +#define UART_RXCTRL 3 +#define UART_IE 4 +#define UART_IP 5 +#define UART_DIV 6 + +#define RXBUF_SIZE 128 +static char uart_rx_buf_data[RXBUF_SIZE]; +static struct cbuf uart_rx_buf; + +void sifive_uart_write(int c) { + uart_write(SOC_DEBUG_UART, c); +} + +int sifive_uart_read(char *c, bool wait) { +#if !POLLING_RX + if (cbuf_read_char(&uart_rx_buf, c, wait) == 1) { + return 0; + } + return -1; +#else + // full polling mode for reads + int i = SOC_DEBUG_UART->RXFIFO; + if (i & (1<<31)) + return -1; + *c = i & 0xff; + return 0; +#endif +} + +static enum handler_return sifive_uart_irq(void *unused) { + LTRACE; + + enum handler_return ret = INT_NO_RESCHEDULE; + for (;;) { + int c = SOC_DEBUG_UART->RXFIFO; + if (c & (1<<31)) + break; // nothing in the fifo + + // stuff this char in the cbuf and try again + cbuf_write_char(&uart_rx_buf, c & 0xff, false); + ret = INT_RESCHEDULE; + } + + return ret; +} + +void sifive_uart_early_init(void) { + // uart_base[UART_DIV] = SOC_FREQ / 115200; + // uart_base[UART_TXCTRL] = 1; // txen + gpio_iof_config(GPIO, IOF0_UART0_MASK, IOF_SEL_0); + uart_init(SOC_DEBUG_UART, 115200); +} + +void sifive_uart_init(void) { + cbuf_initialize_etc(&uart_rx_buf, RXBUF_SIZE, uart_rx_buf_data); + + // uart_enable_rxint(); + // ECLIC_Register_IRQ(); +// uart_base[UART_RXCTRL] = 1; // rxen, rxcnt = 0 +// uart_base[UART_IE] |= (1<<1); // rxwvm + +// #if !POLLING_RX +// unmask_interrupt(SIFIVE_IRQ_UART0); +// #endif +} + diff --git a/platform/nuclei-hbird/vectab.c b/platform/nuclei-hbird/vectab.c new file mode 100644 index 000000000..615f41341 --- /dev/null +++ b/platform/nuclei-hbird/vectab.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2012 Travis Geiselbrecht + * + * Use of this source code is governed by a MIT-style + * license that can be found in the LICENSE file or at + * https://opensource.org/licenses/MIT + */ +#include + +#include +#include + +/* un-overridden irq handler */ +void hbird_dummy_irq(void) { + panic("unhandled irq\n"); +} + +/* a list of default handlers that are simply aliases to the dummy handler */ +#define DEFAULT_HANDLER(x) \ +void hbird_irq##x##_handler(void) __WEAK_ALIAS("hbird_dummy_irq"); + +DEFAULT_HANDLER(19); +DEFAULT_HANDLER(20); +DEFAULT_HANDLER(21); +DEFAULT_HANDLER(22); +DEFAULT_HANDLER(23); +DEFAULT_HANDLER(24); +DEFAULT_HANDLER(25); +DEFAULT_HANDLER(26); +DEFAULT_HANDLER(27); +DEFAULT_HANDLER(28); +DEFAULT_HANDLER(29); +DEFAULT_HANDLER(30); +DEFAULT_HANDLER(31); +DEFAULT_HANDLER(32); +DEFAULT_HANDLER(33); +DEFAULT_HANDLER(34); +DEFAULT_HANDLER(35); +DEFAULT_HANDLER(36); +DEFAULT_HANDLER(37); +DEFAULT_HANDLER(38); +DEFAULT_HANDLER(39); +DEFAULT_HANDLER(40); +DEFAULT_HANDLER(41); +DEFAULT_HANDLER(42); +DEFAULT_HANDLER(43); +DEFAULT_HANDLER(44); +DEFAULT_HANDLER(45); +DEFAULT_HANDLER(46); +DEFAULT_HANDLER(47); +DEFAULT_HANDLER(48); +DEFAULT_HANDLER(49); +DEFAULT_HANDLER(50); + +#define VECTAB_ENTRY(x) [SOC_INT##x##_IRQn] = hbird_irq##x##_handler + +/* + * Appended to the end of the nuclei riscv vector table. + * + */ +const void *const __SECTION(".text.boot.vectab2") vectab2[] = { + VECTAB_ENTRY(19), + VECTAB_ENTRY(20), + VECTAB_ENTRY(21), + VECTAB_ENTRY(22), + VECTAB_ENTRY(23), + VECTAB_ENTRY(24), + VECTAB_ENTRY(25), + VECTAB_ENTRY(26), + VECTAB_ENTRY(27), + VECTAB_ENTRY(28), + VECTAB_ENTRY(29), + VECTAB_ENTRY(30), + VECTAB_ENTRY(31), + VECTAB_ENTRY(32), + VECTAB_ENTRY(33), + VECTAB_ENTRY(34), + VECTAB_ENTRY(35), + VECTAB_ENTRY(36), + VECTAB_ENTRY(37), + VECTAB_ENTRY(38), + VECTAB_ENTRY(39), + VECTAB_ENTRY(40), + VECTAB_ENTRY(41), + VECTAB_ENTRY(42), + VECTAB_ENTRY(43), + VECTAB_ENTRY(44), + VECTAB_ENTRY(45), + VECTAB_ENTRY(46), + VECTAB_ENTRY(47), + VECTAB_ENTRY(48), + VECTAB_ENTRY(49), + VECTAB_ENTRY(50), +}; diff --git a/project/nuclei-hbird.mk b/project/nuclei-hbird.mk new file mode 100644 index 000000000..76ac12fda --- /dev/null +++ b/project/nuclei-hbird.mk @@ -0,0 +1,8 @@ +# main project for qemu-riscv32 +MODULES += \ + app/shell \ + app/tests +SUBARCH := 32 + +include project/target/nuclei-hbird.mk + diff --git a/project/target/nuclei-hbird.mk b/project/target/nuclei-hbird.mk new file mode 100644 index 000000000..b6d5ecdce --- /dev/null +++ b/project/target/nuclei-hbird.mk @@ -0,0 +1,2 @@ +TARGET := nuclei-hbird + diff --git a/target/nuclei-hbird/include/board_hbird_eval.h b/target/nuclei-hbird/include/board_hbird_eval.h new file mode 100644 index 000000000..0c3840e84 --- /dev/null +++ b/target/nuclei-hbird/include/board_hbird_eval.h @@ -0,0 +1,37 @@ +// See LICENSE for license details. +#ifndef _BOARD_HBIRD_EVAL_H_ +#define _BOARD_HBIRD_EVAL_H_ + +#ifdef __cplusplus + extern "C" { +#endif + +#include "nuclei_sdk_soc.h" + + +// Interrupt Numbers +#define SOC_BUTTON_1_IRQn SOC_INT49_IRQn +#define SOC_BUTTON_2_IRQn SOC_INT50_IRQn +// Interrupt Handler Definitions +#define SOC_BUTTON_1_HANDLER eclic_irq49_handler +#define SOC_BUTTON_2_HANDLER eclic_irq50_handler +// GPIO Bit Offset +#define SOC_LED_RED_GPIO_OFS 19 +#define SOC_LED_GREEN_GPIO_OFS 21 +#define SOC_LED_BLUE_GPIO_OFS 22 +#define SOC_BUTTON_1_GPIO_OFS 30 +#define SOC_BUTTON_2_GPIO_OFS 31 + +// GPIO Bit Mask +#define SOC_LED_RED_GPIO_MASK (1< JTAG-DEBUGGER" is for HummingBird Debugger V2 +## Uncomment one which match your device description +# ftdi_device_desc "Dual RS232-HS" +# ftdi_device_desc "USB <-> JTAG-DEBUGGER" + +transport select jtag + +ftdi_layout_init 0x0008 0x001b +ftdi_layout_signal nSRST -oe 0x0020 -data 0x0020 +ftdi_layout_signal TCK -data 0x0001 +ftdi_layout_signal TDI -data 0x0002 +ftdi_layout_signal TDO -input 0x0004 +ftdi_layout_signal TMS -data 0x0008 +ftdi_layout_signal JTAG_SEL -data 0x0100 -oe 0x0100 + +set _CHIPNAME riscv +jtag newtap $_CHIPNAME cpu -irlen 5 + +set _TARGETNAME $_CHIPNAME.cpu +target create $_TARGETNAME riscv -chain-position $_TARGETNAME +$_TARGETNAME configure -work-area-phys 0x80000000 -work-area-size 10000 -work-area-backup 1 + +set _FLASHNAME $_CHIPNAME.flash +flash bank $_FLASHNAME fespi 0x20000000 0 0 0 $_TARGETNAME +# Set the ILM space also as flash, to make sure it can be add breakpoint with hardware trigger +#flash bank onboard_ilm fespi 0x80000000 0 0 0 $_TARGETNAME + +# Expose Nuclei self-defined CSRS +# See https://github.com/riscv/riscv-gnu-toolchain/issues/319#issuecomment-358397306 +# Then user can view the csr register value in gdb using: info reg csr775 for CSR MTVT(0x307) +riscv expose_csrs 416-496,770-800,835-850,1227-1231,1483-1486,1984-2032,2064-2070,2370-2380,2490-2500,4032-4040 + +init + +if {[ info exists pulse_srst]} { + ftdi_set_signal nSRST 0 + ftdi_set_signal nSRST z +} +halt +# We must turn on this because otherwise the IDE version debug cannot download the program into flash +flash protect 0 0 last off diff --git a/target/nuclei-hbird/rules.mk b/target/nuclei-hbird/rules.mk new file mode 100644 index 000000000..aaa5a401e --- /dev/null +++ b/target/nuclei-hbird/rules.mk @@ -0,0 +1,34 @@ +LOCAL_DIR := $(GET_LOCAL_DIR) + +MODULE := $(LOCAL_DIR) + +PLATFORM := nuclei-hbird +WITH_LINKER_GC ?= 1 + +MEMSIZE ?= 0x10000 # 64KB +GLOBAL_DEFINES += TARGET_HAS_DEBUG_LED=1 + +# target code will set the master frequency to 16Mhz +GLOBAL_DEFINES += SOC_FREQ=32000000 DOWNLOAD_MODE=DOWNLOAD_MODE_ILM + +MODULE_SRCS := $(LOCAL_DIR)/target.c + +# set some global defines based on capability +GLOBAL_DEFINES += CONSOLE_ENABLE_HISTORY=0 +GLOBAL_DEFINES += PLATFORM_HAS_DYNAMIC_TIMER=1 +GLOBAL_DEFINES += ARCH_RISCV_CLINT_BASE=0x02000000 +GLOBAL_DEFINES += ARCH_RISCV_MTIME_RATE=32768 + +OPENOCD_CFG := $(LOCAL_DIR)/openocd_hbird.cfg + +OPENOCD_ARGS += -f $(OPENOCD_CFG) + +GDB_UPLOAD_ARGS ?= --batch +GDB_UPLOAD_CMDS += -ex "monitor halt" +GDB_UPLOAD_CMDS += -ex "monitor flash protect 0 0 last off" +GDB_UPLOAD_CMDS += -ex "load" +GDB_UPLOAD_CMDS += -ex "monitor resume" +GDB_UPLOAD_CMDS += -ex "quit" + +include make/module.mk + diff --git a/target/nuclei-hbird/target.c b/target/nuclei-hbird/target.c new file mode 100644 index 000000000..522b9d055 --- /dev/null +++ b/target/nuclei-hbird/target.c @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2019 Travis Geiselbrecht + * + * Use of this source code is governed by a MIT-style + * license that can be found in the LICENSE file or at + * https://opensource.org/licenses/MIT + */ + +#include +#include +#include + +void target_early_init(void) { + +} + +void target_set_debug_led(unsigned int led, bool on) { + +} + +void target_init(void) { +} + + From 2e2678a208030aa967f776cdfc61ea565f9dadfb Mon Sep 17 00:00:00 2001 From: Huaqi Fang <578567190@qq.com> Date: Wed, 28 Oct 2020 13:05:58 +0800 Subject: [PATCH 2/4] [arch][riscv] remove debug message in idle Signed-off-by: Huaqi Fang <578567190@qq.com> --- arch/riscv/arch.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/arch.c b/arch/riscv/arch.c index 913ff2ec1..67892a029 100644 --- a/arch/riscv/arch.c +++ b/arch/riscv/arch.c @@ -143,7 +143,6 @@ void arch_init(void) { void arch_idle(void) { // let the platform/target disable wfi #if !RISCV_DISABLE_WFI - printf("Idle\n"); __asm__ volatile("wfi"); #endif } From 3972af29f95e24010cbe8a7512afa6360cd7cec5 Mon Sep 17 00:00:00 2001 From: Huaqi Fang <578567190@qq.com> Date: Wed, 28 Oct 2020 13:11:21 +0800 Subject: [PATCH 3/4] [ci][travis] Add travis ci build for nuclei-hbird Signed-off-by: Huaqi Fang <578567190@qq.com> --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 758f5d54a..7bea976a7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,6 +22,7 @@ env: - PROJECT=qemu-virt-riscv64-supervisor-test TOOLCHAIN=riscv64-elf-7.5.0-Linux-x86_64 - PROJECT=sifive-e-test TOOLCHAIN=riscv32-elf-7.5.0-Linux-x86_64 - PROJECT=sifive-unleashed-test TOOLCHAIN=riscv64-elf-7.5.0-Linux-x86_64 + - PROJECT=nuclei-hbird TOOLCHAIN=riscv32-elf-7.5.0-Linux-x86_64 - PROJECT=pc-x86-test TOOLCHAIN=i386-elf-7.5.0-Linux-x86_64 - PROJECT=pc-x86-64-test TOOLCHAIN=x86_64-elf-7.5.0-Linux-x86_64 From 52c107216c89999ab5a50f4987f6aef43bdf632a Mon Sep 17 00:00:00 2001 From: Huaqi Fang <578567190@qq.com> Date: Mon, 2 Nov 2020 15:03:57 +0800 Subject: [PATCH 4/4] arch: remove libg++ dependency check Signed-off-by: Huaqi Fang <578567190@qq.com> --- arch/riscv/rules.mk | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/rules.mk b/arch/riscv/rules.mk index 3f93eebde..f755c0a1a 100644 --- a/arch/riscv/rules.mk +++ b/arch/riscv/rules.mk @@ -166,7 +166,6 @@ WITH_LINKER_GC ?= 0 endif LIBGCC := $(shell $(TOOLCHAIN_PREFIX)gcc $(GLOBAL_COMPILEFLAGS) $(ARCH_COMPILEFLAGS) $(GLOBAL_CFLAGS) -print-libgcc-file-name) -LIBGCC += $(shell $(TOOLCHAIN_PREFIX)gcc $(GLOBAL_COMPILEFLAGS) $(ARCH_COMPILEFLAGS) $(GLOBAL_CFLAGS) -print-file-name=libstdc++.a) $(info LIBGCC = $(LIBGCC)) # potentially generated files that should be cleaned out with clean make rule