Skip to content

Commit 3214f30

Browse files
authored
[PMP] add support for NA4 and NAPOT modes (#566)
2 parents a3eb585 + a8e54c8 commit 3214f30

File tree

11 files changed

+354
-391
lines changed

11 files changed

+354
-391
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ mimpid = 0x01040312 => Version 01.04.03.12 => v1.4.3.12
3131

3232
| Date (*dd.mm.yyyy*) | Version | Comment |
3333
|:-------------------:|:-------:|:--------|
34+
| 01.04.2023 | 1.8.3.1 | :sparkles: add full `NA4` and `NAPOT` support to the (now) RISC-V-compatible **physical memory protection (PMP)**; [#566](https://github.com/stnolting/neorv32/pull/566) |
3435
| 31.03.2023 | [**:rocket:1.8.3**](https://github.com/stnolting/neorv32/releases/tag/v1.8.3) | **New release** |
3536
| 29.03.2023 | 1.8.2.9 | :warning: remove `CPU_EXTENSION_RISCV_Zicsr` generic - `Zicsr` ISA extension is always enabled; optimize bus switch; VHDL code cleanups; [#562](https://github.com/stnolting/neorv32/pull/562) |
3637
| 25.03.2023 | 1.8.2.8 | :test_tube: add configurable data cache (**dCACHE**); [#560](https://github.com/stnolting/neorv32/pull/560) |

docs/datasheet/cpu.adoc

+4-9
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,6 @@ instruction exception (-> <<_full_virtualization>>).
3535

3636
**Incompatibility Issues and Limitations**
3737

38-
.Physical Memory Protection (PMP)
39-
[WARNING]
40-
The RISC-V-compatible NEORV32 <<_machine_physical_memory_protection_csrs>> only implements the **TOR**
41-
(top of region) mode and only up to 16 PMP regions.
42-
4338
.No Hardware Support of Misaligned Memory Accesses
4439
[IMPORTANT]
4540
The CPU does not support resolving unaligned memory access by the hardware (this is not a
@@ -561,14 +556,14 @@ to the RISC-V Privileged Architecture Specifications. In general, the PMP can **
561556
which by default has none, and can **revoke permissions from M-mode**, which by default has full permissions.
562557
The PMP is configured via the <<_machine_physical_memory_protection_csrs>>.
563558

564-
[IMPORTANT]
565-
The NEORV32 PMP only supports **TOR** (top of region) mode, which basically is a "base-and-bound" concept, and only
566-
up to 16 PMP regions.
567-
568559
.PMP Rules when in Debug Mode
569560
[NOTE]
570561
When in debug-mode all PMP rules are ignored making the debugger have maximum access rights.
571562

563+
[IMPORTANT]
564+
Instruction fetches are also triggered when denied by a certain PMP rule. However, the fetched instruction(s)
565+
will not be executed and will not change CPU core state to preserve memory access protection.
566+
572567

573568
==== `Sdext` ISA Extension
574569

docs/datasheet/cpu_csr.adoc

+83-86
Large diffs are not rendered by default.

rtl/core/neorv32_cpu_bus.vhd

+104-100
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,9 @@ architecture neorv32_cpu_bus_rtl of neorv32_cpu_bus is
9292
constant pmp_cfg_ah_c : natural := 4; -- mode bit high
9393
constant pmp_cfg_l_c : natural := 7; -- locked entry
9494

95-
-- PMP minimal granularity --
96-
constant pmp_lsb_c : natural := index_size_f(PMP_MIN_GRANULARITY); -- min = 2
95+
-- PMP helpers --
96+
constant pmp_lsb_c : natural := index_size_f(PMP_MIN_GRANULARITY); -- min = 2
97+
constant pmp_zero_c : std_ulogic_vector(XLEN-1 downto pmp_lsb_c) := (others => '0');
9798

9899
-- misc --
99100
signal data_sign : std_ulogic; -- signed load
@@ -110,21 +111,28 @@ architecture neorv32_cpu_bus_rtl of neorv32_cpu_bus is
110111
signal arbiter : bus_arbiter_t;
111112

112113
-- physical memory protection --
114+
type pmp_mask_t is array (0 to PMP_NUM_REGIONS-1) of std_ulogic_vector(XLEN-1 downto pmp_lsb_c);
113115
type pmp_t is record
116+
i_cmp_mm : std_ulogic_vector(PMP_NUM_REGIONS-1 downto 0);
114117
i_cmp_ge : std_ulogic_vector(PMP_NUM_REGIONS-1 downto 0);
115118
i_cmp_lt : std_ulogic_vector(PMP_NUM_REGIONS-1 downto 0);
119+
d_cmp_mm : std_ulogic_vector(PMP_NUM_REGIONS-1 downto 0);
116120
d_cmp_ge : std_ulogic_vector(PMP_NUM_REGIONS-1 downto 0);
117121
d_cmp_lt : std_ulogic_vector(PMP_NUM_REGIONS-1 downto 0);
118122
i_match : std_ulogic_vector(PMP_NUM_REGIONS-1 downto 0);
119123
d_match : std_ulogic_vector(PMP_NUM_REGIONS-1 downto 0);
120124
perm_ex : std_ulogic_vector(PMP_NUM_REGIONS-1 downto 0);
121125
perm_rd : std_ulogic_vector(PMP_NUM_REGIONS-1 downto 0);
122126
perm_wr : std_ulogic_vector(PMP_NUM_REGIONS-1 downto 0);
127+
fail_ex : std_ulogic_vector(PMP_NUM_REGIONS downto 0);
128+
fail_rd : std_ulogic_vector(PMP_NUM_REGIONS downto 0);
129+
fail_wr : std_ulogic_vector(PMP_NUM_REGIONS downto 0);
123130
if_fault : std_ulogic;
124131
ld_fault : std_ulogic;
125132
st_fault : std_ulogic;
126133
end record;
127-
signal pmp : pmp_t;
134+
signal pmp_mask : pmp_mask_t;
135+
signal pmp : pmp_t;
128136

129137
-- pmp faults --
130138
signal if_pmp_fault : std_ulogic; -- pmp instruction access fault
@@ -380,8 +388,7 @@ begin
380388
arbiter.pmp_r_err <= '0';
381389
arbiter.pmp_w_err <= '0';
382390
elsif rising_edge(clk_i) then
383-
arbiter.pmp_r_err <= ld_pmp_fault;
384-
arbiter.pmp_w_err <= st_pmp_fault;
391+
-- arbiter --
385392
if (arbiter.pend = '0') then -- idle
386393
if (ctrl_i.bus_req = '1') then -- start bus access
387394
arbiter.pend <= '1';
@@ -399,6 +406,11 @@ begin
399406
arbiter.pend <= '0';
400407
end if;
401408
end if;
409+
-- PMP error --
410+
if (ctrl_i.bus_mo_we = '1') then -- sample PMP errors only once
411+
arbiter.pmp_r_err <= ld_pmp_fault;
412+
arbiter.pmp_w_err <= st_pmp_fault;
413+
end if;
402414
end if;
403415
end process data_access_arbiter;
404416

@@ -421,111 +433,103 @@ begin
421433
-- RISC-V Physical Memory Protection (PMP) ------------------------------------------------
422434
-- -------------------------------------------------------------------------------------------
423435

424-
-- check address --
425-
pmp_check_address: process(fetch_pc_i, addr_i, pmp_addr_i)
426-
begin
427-
for r in 0 to PMP_NUM_REGIONS-1 loop
428-
if (r = 0) then -- first entry: use ZERO as base and current entry as bound
429-
pmp.i_cmp_ge(r) <= '1'; -- address is always greater than or equal to zero
430-
pmp.i_cmp_lt(r) <= '0'; -- unused
431-
pmp.d_cmp_ge(r) <= '1'; -- address is always greater than or equal to zero
432-
pmp.d_cmp_lt(r) <= '0'; -- unused
433-
else -- use previous entry as base and current entry as bound
434-
pmp.i_cmp_ge(r) <= bool_to_ulogic_f(unsigned(fetch_pc_i(XLEN-1 downto pmp_lsb_c)) >= unsigned(pmp_addr_i(r-1)(XLEN-1 downto pmp_lsb_c)));
435-
pmp.i_cmp_lt(r) <= bool_to_ulogic_f(unsigned(fetch_pc_i(XLEN-1 downto pmp_lsb_c)) < unsigned(pmp_addr_i(r-0)(XLEN-1 downto pmp_lsb_c)));
436-
pmp.d_cmp_ge(r) <= bool_to_ulogic_f(unsigned( addr_i(XLEN-1 downto pmp_lsb_c)) >= unsigned(pmp_addr_i(r-1)(XLEN-1 downto pmp_lsb_c)));
437-
pmp.d_cmp_lt(r) <= bool_to_ulogic_f(unsigned( addr_i(XLEN-1 downto pmp_lsb_c)) < unsigned(pmp_addr_i(r-0)(XLEN-1 downto pmp_lsb_c)));
438-
end if;
439-
end loop; -- r
440-
end process pmp_check_address;
441-
442-
443-
-- check mode --
444-
pmp_check_mode: process(pmp_ctrl_i, pmp)
445-
begin
446-
for r in 0 to PMP_NUM_REGIONS-1 loop
447-
if (pmp_ctrl_i(r)(pmp_cfg_ah_c downto pmp_cfg_al_c) = pmp_mode_tor_c) then -- TOR mode
448-
if (r < (PMP_NUM_REGIONS-1)) then
449-
-- this saves a LOT of comparators --
450-
pmp.i_match(r) <= pmp.i_cmp_ge(r) and (not pmp.i_cmp_ge(r+1));
451-
pmp.d_match(r) <= pmp.d_cmp_ge(r) and (not pmp.d_cmp_ge(r+1));
452-
else -- very last entry
453-
pmp.i_match(r) <= pmp.i_cmp_ge(r) and pmp.i_cmp_lt(r);
454-
pmp.d_match(r) <= pmp.d_cmp_ge(r) and pmp.d_cmp_lt(r);
436+
-- compute address masks for NAPOT modes (iterative!) --
437+
pmp_masking_gen:
438+
for r in 0 to PMP_NUM_REGIONS-1 generate
439+
pmp_masking: process(rstn_i, clk_i)
440+
begin
441+
if (rstn_i = '0') then
442+
pmp_mask(r) <= (others => '0');
443+
elsif rising_edge(clk_i) then -- address mask computation has a latency of max 32 cycles
444+
if (pmp_ctrl_i(r)(pmp_cfg_al_c) = '1') then -- NAPOT (or TOR, but that's irrelevant here)
445+
pmp_mask(r)(pmp_lsb_c) <= '0';
446+
for i in pmp_lsb_c+1 to XLEN-1 loop
447+
pmp_mask(r)(i) <= pmp_mask(r)(i-1) or (not pmp_addr_i(r)(i-1)); -- skip address byte offset
448+
end loop; -- i
449+
else -- NA4
450+
pmp_mask(r) <= (others => '1');
455451
end if;
456-
else -- entry disabled
457-
pmp.i_match(r) <= '0';
458-
pmp.d_match(r) <= '0';
459452
end if;
460-
end loop; -- r
461-
end process pmp_check_mode;
453+
end process pmp_masking;
454+
end generate;
462455

463456

464-
-- check permission --
465-
pmp_check_permission: process(ctrl_i, pmp_ctrl_i)
466-
begin
467-
for r in 0 to PMP_NUM_REGIONS-1 loop
457+
-- check address --
458+
pmp_check_address:
459+
for r in 0 to PMP_NUM_REGIONS-1 generate
460+
-- NA4 and NAPOT --
461+
pmp.i_cmp_mm(r) <= '1' when ((fetch_pc_i(XLEN-1 downto pmp_lsb_c) and pmp_mask(r)) = (pmp_addr_i(r)(XLEN-1 downto pmp_lsb_c) and pmp_mask(r))) else '0';
462+
pmp.d_cmp_mm(r) <= '1' when (( addr_i(XLEN-1 downto pmp_lsb_c) and pmp_mask(r)) = (pmp_addr_i(r)(XLEN-1 downto pmp_lsb_c) and pmp_mask(r))) else '0';
463+
-- TOR region 0 --
464+
pmp_check_address_r0:
465+
if (r = 0) generate -- first entry: use ZERO as base and current entry as bound
466+
pmp.i_cmp_ge(r) <= '1'; -- address is always greater than or equal to zero
467+
pmp.i_cmp_lt(r) <= '0'; -- unused
468+
pmp.d_cmp_ge(r) <= '1'; -- address is always greater than or equal to zero
469+
pmp.d_cmp_lt(r) <= '0'; -- unused
470+
end generate;
471+
-- TOR region any --
472+
pmp_check_address_rany:
473+
if (r > 0) generate -- use previous entry as base and current entry as bound
474+
pmp.i_cmp_ge(r) <= '1' when (unsigned(fetch_pc_i(XLEN-1 downto pmp_lsb_c)) >= unsigned(pmp_addr_i(r-1)(XLEN-1 downto pmp_lsb_c))) else '0';
475+
pmp.i_cmp_lt(r) <= '1' when (unsigned(fetch_pc_i(XLEN-1 downto pmp_lsb_c)) < unsigned(pmp_addr_i(r )(XLEN-1 downto pmp_lsb_c))) else '0';
476+
pmp.d_cmp_ge(r) <= '1' when (unsigned( addr_i(XLEN-1 downto pmp_lsb_c)) >= unsigned(pmp_addr_i(r-1)(XLEN-1 downto pmp_lsb_c))) else '0';
477+
pmp.d_cmp_lt(r) <= '1' when (unsigned( addr_i(XLEN-1 downto pmp_lsb_c)) < unsigned(pmp_addr_i(r )(XLEN-1 downto pmp_lsb_c))) else '0';
478+
end generate;
479+
end generate;
468480

469-
-- instruction fetch access --
470-
if (ctrl_i.cpu_priv = priv_mode_m_c) then -- M mode: always allow if lock bit not set, otherwise check permission
471-
pmp.perm_ex(r) <= (not pmp_ctrl_i(r)(pmp_cfg_l_c)) or pmp_ctrl_i(r)(pmp_cfg_x_c);
472-
else -- U mode: always check permission
473-
pmp.perm_ex(r) <= pmp_ctrl_i(r)(pmp_cfg_x_c);
474-
end if;
475481

476-
-- load/store accesses from M mod (can also use U mode's permissions if MSTATUS.MPRV is set) --
477-
if (ctrl_i.bus_priv = priv_mode_m_c) then -- M mode: always allow if lock bit not set, otherwise check permission
478-
pmp.perm_rd(r) <= (not pmp_ctrl_i(r)(pmp_cfg_l_c)) or pmp_ctrl_i(r)(pmp_cfg_r_c);
479-
pmp.perm_wr(r) <= (not pmp_ctrl_i(r)(pmp_cfg_l_c)) or pmp_ctrl_i(r)(pmp_cfg_w_c);
480-
else -- U mode: always check permission
481-
pmp.perm_rd(r) <= pmp_ctrl_i(r)(pmp_cfg_r_c);
482-
pmp.perm_wr(r) <= pmp_ctrl_i(r)(pmp_cfg_w_c);
483-
end if;
482+
-- check mode --
483+
pmp_check_mode_gen:
484+
for r in 0 to PMP_NUM_REGIONS-1 generate
485+
pmp_check_mode: process(pmp_ctrl_i, pmp)
486+
begin
487+
case pmp_ctrl_i(r)(pmp_cfg_ah_c downto pmp_cfg_al_c) is
488+
when pmp_mode_off_c => -- entry disabled
489+
pmp.i_match(r) <= '0';
490+
pmp.d_match(r) <= '0';
491+
when pmp_mode_tor_c => -- top of region
492+
if (r = (PMP_NUM_REGIONS-1)) then -- very last entry
493+
pmp.i_match(r) <= pmp.i_cmp_ge(r) and pmp.i_cmp_lt(r);
494+
pmp.d_match(r) <= pmp.d_cmp_ge(r) and pmp.d_cmp_lt(r);
495+
else -- this saves a LOT of comparators
496+
pmp.i_match(r) <= pmp.i_cmp_ge(r) and (not pmp.i_cmp_ge(r+1));
497+
pmp.d_match(r) <= pmp.d_cmp_ge(r) and (not pmp.d_cmp_ge(r+1));
498+
end if;
499+
when others => -- naturally-aligned region
500+
pmp.i_match(r) <= pmp.i_cmp_mm(r);
501+
pmp.d_match(r) <= pmp.d_cmp_mm(r);
502+
end case;
503+
end process pmp_check_mode;
504+
end generate;
484505

485-
end loop; -- r
486-
end process pmp_check_permission;
506+
507+
-- check permission --
508+
-- M mode: always allow if lock bit not set, otherwise check permission
509+
pmp_check_permission:
510+
for r in 0 to PMP_NUM_REGIONS-1 generate
511+
pmp.perm_ex(r) <= pmp_ctrl_i(r)(pmp_cfg_x_c) or (not pmp_ctrl_i(r)(pmp_cfg_l_c)) when (ctrl_i.cpu_priv = priv_mode_m_c) else pmp_ctrl_i(r)(pmp_cfg_x_c);
512+
pmp.perm_rd(r) <= pmp_ctrl_i(r)(pmp_cfg_r_c) or (not pmp_ctrl_i(r)(pmp_cfg_l_c)) when (ctrl_i.bus_priv = priv_mode_m_c) else pmp_ctrl_i(r)(pmp_cfg_r_c);
513+
pmp.perm_wr(r) <= pmp_ctrl_i(r)(pmp_cfg_w_c) or (not pmp_ctrl_i(r)(pmp_cfg_l_c)) when (ctrl_i.bus_priv = priv_mode_m_c) else pmp_ctrl_i(r)(pmp_cfg_w_c);
514+
end generate;
487515

488516

489517
-- check for access fault (using static prioritization) --
490-
pmp_check_fault: process(ctrl_i, pmp)
491-
variable tmp_if_v, tmp_ld_v, tmp_st_v : std_ulogic_vector(PMP_NUM_REGIONS downto 0);
492-
begin
493-
-- > This is a *structural* description of a prioritization logic (a multiplexer chain).
494-
-- > I prefer this style as I do not like using a loop with 'exit' - and I also think this style might be smaller
495-
-- > and faster (could use the carry chain?!) as the synthesizer has less freedom doing what *I* want. ;)
496-
tmp_if_v(PMP_NUM_REGIONS) := bool_to_ulogic_f(ctrl_i.cpu_priv /= priv_mode_m_c); -- default: fault if U mode
497-
tmp_ld_v(PMP_NUM_REGIONS) := bool_to_ulogic_f(ctrl_i.bus_priv /= priv_mode_m_c); -- default: fault if U mode
498-
tmp_st_v(PMP_NUM_REGIONS) := bool_to_ulogic_f(ctrl_i.bus_priv /= priv_mode_m_c); -- default: fault if U mode
499-
500-
for r in PMP_NUM_REGIONS-1 downto 0 loop -- start with lowest priority
501-
-- instruction fetch access --
502-
if (pmp.i_match(r) = '1') then -- address matches region r
503-
tmp_if_v(r) := not pmp.perm_ex(r); -- fault if no execute permission
504-
else
505-
tmp_if_v(r) := tmp_if_v(r+1);
506-
end if;
507-
-- data load/store access --
508-
if (pmp.d_match(r) = '1') then -- address matches region r
509-
tmp_ld_v(r) := not pmp.perm_rd(r); -- fault if no read permission
510-
tmp_st_v(r) := not pmp.perm_wr(r); -- fault if no write permission
511-
else
512-
tmp_ld_v(r) := tmp_ld_v(r+1);
513-
tmp_st_v(r) := tmp_st_v(r+1);
514-
end if;
515-
end loop; -- r
516-
pmp.if_fault <= tmp_if_v(0);
517-
pmp.ld_fault <= tmp_ld_v(0);
518-
pmp.st_fault <= tmp_st_v(0);
519-
520-
-- > this is the behavioral version of the code above (instruction fetch access)
521-
-- pmp.if_fault <= bool_to_ulogic_f(ctrl_i.cpu_priv /= priv_mode_m_c); -- default: fault if U mode
522-
-- for r in 0 to PMP_NUM_REGIONS-1 loop
523-
-- if (pmp.i_match(r) = '1') then
524-
-- pmp.if_fault <= not pmp.perm_ex(r); -- fault if no execute permission
525-
-- exit;
526-
-- end if;
527-
-- end loop; -- r
528-
end process pmp_check_fault;
518+
-- default: fault if not M-mode --
519+
pmp.fail_ex(PMP_NUM_REGIONS) <= '1' when (ctrl_i.cpu_priv /= priv_mode_m_c) else '0';
520+
pmp.fail_rd(PMP_NUM_REGIONS) <= '1' when (ctrl_i.bus_priv /= priv_mode_m_c) else '0';
521+
pmp.fail_wr(PMP_NUM_REGIONS) <= '1' when (ctrl_i.bus_priv /= priv_mode_m_c) else '0';
522+
-- This is a *structural* description of a prioritization logic implemented as a multiplexer chain. --
523+
pmp_chech_fault:
524+
for r in PMP_NUM_REGIONS-1 downto 0 generate -- start with lowest priority
525+
pmp.fail_ex(r) <= not pmp.perm_ex(r) when (pmp.i_match(r) = '1') else pmp.fail_ex(r+1);
526+
pmp.fail_rd(r) <= not pmp.perm_rd(r) when (pmp.d_match(r) = '1') else pmp.fail_rd(r+1);
527+
pmp.fail_wr(r) <= not pmp.perm_wr(r) when (pmp.d_match(r) = '1') else pmp.fail_wr(r+1);
528+
end generate;
529+
pmp.if_fault <= pmp.fail_ex(0);
530+
pmp.ld_fault <= pmp.fail_rd(0);
531+
pmp.st_fault <= pmp.fail_wr(0);
532+
529533

530534
-- final PMP access fault signals (ignored when in debug mode) --
531535
if_pmp_fault <= '1' when (pmp.if_fault = '1') and (PMP_NUM_REGIONS > 0) and (ctrl_i.cpu_debug = '0') else '0';

0 commit comments

Comments
 (0)