18
18
*/
19
19
20
20
/*
21
- * This exception handler, allows for byte or short accesses to iRAM or PROGMEM
22
- * to succeed without causing a crash. It is still preferred to use the xxx_P
23
- * macros whenever possible, since they are probably 30x faster than this
24
- * exception handler method.
21
+ * This exception handler handles EXCCAUSE_LOAD_STORE_ERROR. It allows for a
22
+ * byte or short access to iRAM or PROGMEM to succeed without causing a crash.
23
+ * When reading, it is still preferred to use the xxx_P macros when possible
24
+ * since they are probably 30x faster than this exception handler method.
25
25
*
26
26
* Code taken directly from @pvvx's public domain code in
27
27
* https://github.com/pvvx/esp8266web/blob/master/app/sdklib/system/app_main.c
37
37
#include < Schedule.h>
38
38
#include < debug.h>
39
39
40
+ // All of these optimization were tried and now work
41
+ // These results were from irammem.ino using GCC 10.2
42
+ // DRAM reference uint16 9 AVG cycles/transfer
43
+ // #pragma GCC optimize("O0") // uint16, 289 AVG cycles/transfer, IRAM: +180
44
+ // #pragma GCC optimize("O1") // uint16, 241 AVG cycles/transfer, IRAM: +16
45
+ #pragma GCC optimize("O2") // uint16, 230 AVG cycles/transfer, IRAM: +4
46
+ // #pragma GCC optimize("O3") // uint16, 230 AVG cycles/transfer, IRAM: +4
47
+ // #pragma GCC optimize("Ofast") // uint16, 230 AVG cycles/transfer, IRAM: +4
48
+ // #pragma GCC optimize("Os") // uint16, 233 AVG cycles/transfer, IRAM: 27556 +0
49
+
40
50
extern " C" {
41
51
42
52
#define LOAD_MASK 0x00f00fu
@@ -50,32 +60,55 @@ extern "C" {
50
60
51
61
static fn_c_exception_handler_t old_c_handler = NULL ;
52
62
53
- static IRAM_ATTR void non32xfer_exception_handler (struct __exception_frame *ef, int cause)
63
+ static
64
+ IRAM_ATTR void non32xfer_exception_handler (struct __exception_frame *ef, int cause)
54
65
{
55
66
do {
56
67
/*
57
- Had to split out some of the asm, compiler was reusing a register that it
58
- needed later. A crash would come or go away with the slightest unrelated
59
- changes elsewhere in the function.
60
-
61
- Register a15 was used for epc1, then clobbered for rsr. Maybe an
62
- __asm("":::"memory") before starting the asm would help for these cases.
63
- For this instance moved setting epc1 closer to where it was used.
64
- Edit. "&" on output register would have resolved the problem.
65
- Refactored to reduce and consolidate register usage.
68
+ In adapting the public domain version, a crash would come or go away with
69
+ the slightest unrelated changes elsewhere in the function. Observed that
70
+ register a15 was used for epc1, then clobbered by `rsr.` I now believe a
71
+ "&" on the output register would have resolved the problem.
72
+
73
+ However, I have refactored the Extended ASM to reduce and consolidate
74
+ register usage and corrected the issue.
75
+
76
+ The positioning of the Extended ASM block (as early as possible in the
77
+ compiled function) is in part controlled by the immediate need for
78
+ output variable `insn`. This placement aids in getting excvaddr read as
79
+ early as possible.
66
80
*/
67
- uint32_t insn;
68
- __asm (
69
- " movi %0, ~3\n\t " /* prepare a mask for the EPC */
70
- " and %0, %0, %1\n\t " /* apply mask for 32bit aligned base */
71
- " ssa8l %1\n\t " /* set up shift register for src op */
72
- " l32i %1, %0, 0\n\t " /* load part 1 */
73
- " l32i %0, %0, 4\n\t " /* load part 2 */
74
- " src %0, %0, %1\n\t " /* right shift to get faulting instruction */
75
- :" =&r" (insn)
76
- :" r" (ef->epc )
77
- :
78
- );
81
+ uint32_t insn, excvaddr;
82
+ #if 1
83
+ {
84
+ uint32_t tmp;
85
+ __asm__ (
86
+ " rsr.excvaddr %[vaddr]\n\t " /* Read faulting address as early as possible */
87
+ " movi.n %[tmp], ~3\n\t " /* prepare a mask for the EPC */
88
+ " and %[tmp], %[tmp], %[epc]\n\t " /* apply mask for 32-bit aligned base */
89
+ " ssa8l %[epc]\n\t " /* set up shift register for src op */
90
+ " l32i %[insn], %[tmp], 0\n\t " /* load part 1 */
91
+ " l32i %[tmp], %[tmp], 4\n\t " /* load part 2 */
92
+ " src %[insn], %[tmp], %[insn]\n\t " /* right shift to get faulting instruction */
93
+ : [vaddr]" =&r" (excvaddr), [insn]" =&r" (insn), [tmp]" =&r" (tmp)
94
+ : [epc]" r" (ef->epc ) :);
95
+ }
96
+
97
+ #else
98
+ {
99
+ __asm__ __volatile__ ("rsr.excvaddr %0;" : "=r"(excvaddr):: "memory");
100
+ /*
101
+ "C" reference code for the ASM to document intent.
102
+ May also prove useful when issolating possible issues with Extended ASM,
103
+ optimizations, new compilers, etc.
104
+ */
105
+ uint32_t epc = ef->epc;
106
+ uint32_t *pWord = (uint32_t *)(epc & ~3);
107
+ uint64_t big_word = ((uint64_t)pWord[1] << 32) | pWord[0];
108
+ uint32_t pos = (epc & 3) * 8;
109
+ insn = (uint32_t)(big_word >>= pos);
110
+ }
111
+ #endif
79
112
80
113
uint32_t what = insn & LOAD_MASK;
81
114
uint32_t valmask = 0 ;
@@ -102,10 +135,6 @@ static IRAM_ATTR void non32xfer_exception_handler(struct __exception_frame *ef,
102
135
--regno; /* account for skipped a1 in exception_frame */
103
136
}
104
137
105
- uint32_t excvaddr;
106
- /* read out the faulting address */
107
- __asm (" rsr %0, EXCVADDR;" :" =r" (excvaddr)::);
108
-
109
138
#ifdef DEBUG_ESP_MMU
110
139
/* debug option to validate address so we don't hide memory access bugs in APP */
111
140
if (mmu_is_iram ((void *)excvaddr) || (is_read && mmu_is_icache ((void *)excvaddr))) {
@@ -114,31 +143,34 @@ static IRAM_ATTR void non32xfer_exception_handler(struct __exception_frame *ef,
114
143
continue ; /* fail */
115
144
}
116
145
#endif
117
-
118
- if (is_read) {
119
- /* Load, shift and mask down to correct size */
120
- uint32_t val = (*(uint32_t *)(excvaddr & ~0x3 ));
121
- val >>= (excvaddr & 0x3 ) * 8 ;
122
- val &= valmask;
123
-
124
- /* Sign-extend for L16SI, if applicable */
125
- if (what == L16SI_MATCH && (val & 0x8000 )) {
126
- val |= 0xffff0000 ;
146
+ {
147
+ uint32_t *pWord = (uint32_t *)(excvaddr & ~0x3 );
148
+ uint32_t pos = (excvaddr & 0x3 ) * 8 ;
149
+ uint32_t mem_val = *pWord;
150
+
151
+ if (is_read) {
152
+ /* shift and mask down to correct size */
153
+ mem_val >>= pos;
154
+ mem_val &= valmask;
155
+
156
+ /* Sign-extend for L16SI, if applicable */
157
+ if (what == L16SI_MATCH && (mem_val & 0x8000 )) {
158
+ mem_val |= 0xffff0000 ;
159
+ }
160
+
161
+ ef->a_reg [regno] = mem_val; /* carry out the load */
162
+
163
+ } else { /* is write */
164
+ uint32_t val = ef->a_reg [regno]; /* get value to store from register */
165
+ val <<= pos;
166
+ valmask <<= pos;
167
+ val &= valmask;
168
+
169
+ /* mask out field, and merge */
170
+ mem_val &= (~valmask);
171
+ mem_val |= val;
172
+ *pWord = mem_val; /* carry out the store */
127
173
}
128
-
129
- ef->a_reg [regno] = val; /* carry out the load */
130
-
131
- } else { /* is write */
132
- uint32_t val = ef->a_reg [regno]; /* get value to store from register */
133
- val <<= (excvaddr & 0x3 ) * 8 ;
134
- valmask <<= (excvaddr & 0x3 ) * 8 ;
135
- val &= valmask;
136
-
137
- /* Load, mask out field, and merge */
138
- uint32_t dst_val = (*(uint32_t *)(excvaddr & ~0x3 ));
139
- dst_val &= (~valmask);
140
- dst_val |= val;
141
- (*(uint32_t *)(excvaddr & ~0x3 )) = dst_val; /* carry out the store */
142
174
}
143
175
144
176
ef->epc += 3 ; /* resume at following instruction */
@@ -201,6 +233,7 @@ static void _set_exception_handler_wrapper(int cause) {
201
233
}
202
234
}
203
235
236
+ void install_non32xfer_exception_handler (void ) __attribute__((weak));
204
237
void install_non32xfer_exception_handler (void ) {
205
238
if (NULL == old_c_handler) {
206
239
// Set the "C" exception handler the wrapper will call
0 commit comments